38 return XmlDocument (textToParse).getDocumentElement();
41 std::unique_ptr<XmlElement> parseXML (
const String& textToParse)
46 std::unique_ptr<XmlElement> parseXML (
const File& file)
48 return XmlDocument (file).getDocumentElement();
51 std::unique_ptr<XmlElement> parseXMLIfTagMatches (
const String& textToParse, StringRef requiredTag)
53 return XmlDocument (textToParse).getDocumentElementIfTagMatches (requiredTag);
56 std::unique_ptr<XmlElement> parseXMLIfTagMatches (
const File& file, StringRef requiredTag)
58 return XmlDocument (file).getDocumentElementIfTagMatches (requiredTag);
63 inputSource.reset (newSource);
68 ignoreEmptyTextElements = shouldBeIgnored;
71 namespace XmlIdentifierChars
73 static bool isIdentifierCharSlow (juce_wchar c) noexcept
76 || c ==
'_' || c ==
'-' || c ==
':' || c ==
'.';
79 static bool isIdentifierChar (juce_wchar c) noexcept
81 static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
83 return ((
int) c < (
int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (uint32) (1 << (c & 31))) != 0)
84 : isIdentifierCharSlow (c);
103 while (isIdentifierChar (*p))
112 if (originalText.
isEmpty() && inputSource !=
nullptr)
114 std::unique_ptr<InputStream> in (inputSource->createInputStream());
121 #if JUCE_STRING_UTF_TYPE == 8
125 auto* text =
static_cast<const char*
> (data.
getData());
147 return parseDocumentElement (originalText.
getCharPointer(), onlyReadOuterDocumentElement);
153 if (xml->hasTagName (requiredTag))
164 void XmlDocument::setLastError (
const String& desc,
const bool carryOn)
167 errorOccurred = ! carryOn;
170 String XmlDocument::getFileContents (
const String& filename)
const
172 if (inputSource !=
nullptr)
174 std::unique_ptr<InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
177 return in->readEntireStreamAsString();
183 juce_wchar XmlDocument::readNextChar() noexcept
197 bool onlyReadOuterDocumentElement)
200 errorOccurred =
false;
202 needToLoadDTD =
true;
204 if (textToParse.isEmpty())
206 lastError =
"not enough input";
208 else if (! parseHeader())
210 lastError =
"malformed header";
212 else if (! parseDTD())
214 lastError =
"malformed DTD";
219 std::unique_ptr<XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
228 bool XmlDocument::parseHeader()
230 skipNextWhiteSpace();
236 if (headerEnd.isEmpty())
240 auto encoding = String (input, headerEnd)
241 .fromFirstOccurrenceOf (
"encoding",
false,
true)
242 .fromFirstOccurrenceOf (
"=",
false,
false)
243 .fromFirstOccurrenceOf (
"\"",
false,
false)
244 .upToFirstOccurrenceOf (
"\"",
false,
false)
254 jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase (
"utf-"));
257 input = headerEnd + 2;
258 skipNextWhiteSpace();
264 bool XmlDocument::parseDTD()
269 auto dtdStart = input;
271 for (
int n = 1; n > 0;)
273 auto c = readNextChar();
284 dtdText = String (dtdStart, input - 1).
trim();
290 void XmlDocument::skipNextWhiteSpace()
309 auto closeComment = input.
indexOf (CharPointer_ASCII (
"-->"));
311 if (closeComment < 0)
317 input += closeComment + 3;
324 auto closeBracket = input.
indexOf (CharPointer_ASCII (
"?>"));
326 if (closeBracket < 0)
332 input += closeBracket + 2;
341 void XmlDocument::readQuotedString (String& result)
343 auto quote = readNextChar();
347 auto c = readNextChar();
364 auto character = *input;
366 if (character == quote)
368 result.appendCharPointer (start, input);
373 if (character ==
'&')
375 result.appendCharPointer (start, input);
381 setLastError (
"unmatched quotes",
false);
392 XmlElement* XmlDocument::readNextElement (
const bool alsoParseSubElements)
394 XmlElement* node =
nullptr;
395 skipNextWhiteSpace();
403 auto endOfToken = XmlIdentifierChars::findEndOfToken (input);
405 if (endOfToken == input)
408 skipNextWhiteSpace();
409 endOfToken = XmlIdentifierChars::findEndOfToken (input);
411 if (endOfToken == input)
413 setLastError (
"tag name missing",
false);
418 node =
new XmlElement (input, endOfToken);
420 LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes);
425 skipNextWhiteSpace();
429 if (c ==
'/' && input[1] ==
'>')
440 if (alsoParseSubElements)
441 readChildElements (*node);
447 if (XmlIdentifierChars::isIdentifierChar (c))
449 auto attNameEnd = XmlIdentifierChars::findEndOfToken (input);
451 if (attNameEnd != input)
453 auto attNameStart = input;
455 skipNextWhiteSpace();
457 if (readNextChar() ==
'=')
459 skipNextWhiteSpace();
460 auto nextChar = *input;
462 if (nextChar ==
'"' || nextChar ==
'\'')
464 auto* newAtt =
new XmlElement::XmlAttributeNode (attNameStart, attNameEnd);
465 readQuotedString (newAtt->value);
466 attributeAppender.append (newAtt);
472 setLastError (
"expected '=' after attribute '"
473 + String (attNameStart, attNameEnd) +
"'",
false);
481 setLastError (
"illegal character found in " + node->getTagName() +
": '" + c +
"'",
false);
491 void XmlDocument::readChildElements (XmlElement& parent)
493 LinkedListPointer<XmlElement>::Appender childAppender (parent.firstChildElement);
497 auto preWhitespaceInput = input;
498 skipNextWhiteSpace();
502 setLastError (
"unmatched tags",
false);
513 auto closeTag = input.
indexOf ((juce_wchar)
'>');
516 input += closeTag + 1;
524 auto inputStart = input;
532 setLastError (
"unterminated CDATA section",
false);
537 if (c0 ==
']' && input[1] ==
']' && input[2] ==
'>')
550 if (
auto* n = readNextElement (
true))
551 childAppender.append (n);
558 input = preWhitespaceInput;
559 MemoryOutputStream textElementContent;
560 bool contentShouldBeUsed = ! ignoreEmptyTextElements;
568 if (input[1] ==
'!' && input[2] ==
'-' && input[3] ==
'-')
571 auto closeComment = input.
indexOf (CharPointer_ASCII (
"-->"));
573 if (closeComment < 0)
575 setLastError (
"unterminated comment",
false);
580 input += closeComment + 3;
589 setLastError (
"unmatched tags",
false);
599 if (entity.startsWithChar (
'<') && entity [1] != 0)
601 auto oldInput = input;
602 auto oldOutOfData = outOfData;
604 input = entity.getCharPointer();
607 while (
auto* n = readNextElement (
true))
608 childAppender.append (n);
611 outOfData = oldOutOfData;
615 textElementContent << entity;
616 contentShouldBeUsed = contentShouldBeUsed || entity.containsNonWhitespaceChars();
623 auto nextChar = *input;
625 if (nextChar ==
'\r')
629 if (input[1] ==
'\n')
633 if (nextChar ==
'<' || nextChar ==
'&')
638 setLastError (
"unmatched tags",
false);
643 textElementContent.appendUTF8Char (nextChar);
649 if (contentShouldBeUsed)
655 void XmlDocument::readEntity (String& result)
685 else if (*input ==
'#')
690 if (*input ==
'x' || *input ==
'X')
695 while (input[0] !=
';')
699 if (hexValue < 0 || ++numChars > 8)
701 setLastError (
"illegal escape sequence",
true);
705 charCode = (charCode << 4) | hexValue;
711 else if (input[0] >=
'0' && input[0] <=
'9')
715 while (input[0] !=
';')
719 setLastError (
"illegal escape sequence",
true);
723 charCode = charCode * 10 + ((int) input[0] -
'0');
731 setLastError (
"illegal escape sequence",
true);
736 result << (juce_wchar) charCode;
740 auto entityNameStart = input;
741 auto closingSemiColon = input.
indexOf ((juce_wchar)
';');
743 if (closingSemiColon < 0)
750 input += closingSemiColon + 1;
751 result += expandExternalEntity (String (entityNameStart, (
size_t) closingSemiColon));
756 String XmlDocument::expandEntity (
const String& ent)
768 if (char1 ==
'x' || char1 ==
'X')
771 if (char1 >=
'0' && char1 <=
'9')
774 setLastError (
"illegal escape sequence",
false);
778 return expandExternalEntity (ent);
781 String XmlDocument::expandExternalEntity (
const String& entity)
790 if (tokenisedDTD[tokenisedDTD.
size() - 2].equalsIgnoreCase (
"system")
791 && tokenisedDTD[tokenisedDTD.
size() - 1].isQuotedString())
793 auto fn = tokenisedDTD[tokenisedDTD.
size() - 1];
795 tokenisedDTD.
clear();
796 tokenisedDTD.
addTokens (getFileContents (fn),
true);
800 tokenisedDTD.
clear();
807 if (closeBracket > openBracket)
809 closeBracket),
true);
813 for (
int i = tokenisedDTD.
size(); --i >= 0;)
815 if (tokenisedDTD[i].startsWithChar (
'%')
816 && tokenisedDTD[i].endsWithChar (
';'))
818 auto parsed = getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1));
820 newToks.addTokens (parsed,
true);
824 for (
int j = newToks.size(); --j >= 0;)
825 tokenisedDTD.
insert (i, newToks[j]);
830 needToLoadDTD =
false;
833 for (
int i = 0; i < tokenisedDTD.
size(); ++i)
835 if (tokenisedDTD[i] == entity)
837 if (tokenisedDTD[i - 1].equalsIgnoreCase (
"<!entity"))
839 auto ent = tokenisedDTD [i + 1].trimCharactersAtEnd (
">").
trim().unquoted();
842 auto ampersand = ent.indexOfChar (
'&');
844 while (ampersand >= 0)
846 auto semiColon = ent.indexOf (i + 1,
";");
850 setLastError (
"entity without terminating semi-colon",
false);
854 auto resolved = expandEntity (ent.substring (i + 1, semiColon));
856 ent = ent.substring (0, ampersand)
858 + ent.substring (semiColon + 1);
860 ampersand = ent.indexOfChar (semiColon + 1,
'&');
868 setLastError (
"unknown entity",
true);
872 String XmlDocument::getParameterEntity (
const String& entity)
874 for (
int i = 0; i < tokenisedDTD.
size(); ++i)
876 if (tokenisedDTD[i] == entity
877 && tokenisedDTD [i - 1] ==
"%"
878 && tokenisedDTD [i - 2].equalsIgnoreCase (
"<!entity"))
880 auto ent = tokenisedDTD [i + 1].trimCharactersAtEnd (
">");
882 if (ent.equalsIgnoreCase (
"system"))
883 return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (
">"));
885 return ent.trim().unquoted();
static bool isByteOrderMarkBigEndian(const void *possibleByteOrder) noexcept
Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (big endian).
static bool isByteOrderMarkLittleEndian(const void *possibleByteOrder) noexcept
Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (little endian)...
Wraps a pointer to a null-terminated UTF-8 character string, and provides various methods to operate ...
int compareIgnoreCaseUpTo(const CharPointer other, const int maxChars) const noexcept
Compares this string with another one, up to a specified number of characters.
juce_wchar getAndAdvance() noexcept
Returns the character that this pointer is currently pointing to, and then advances the pointer to po...
bool isEmpty() const noexcept
Returns true if this pointer is pointing to a null character.
int indexOf(const CharPointer stringToFind) const noexcept
Returns the character index of a substring, or -1 if it isn't found.
static bool isByteOrderMark(const void *possibleByteOrder) noexcept
Returns true if the first three bytes in this pointer are the UTF8 byte-order mark (BOM).
CharPointer_UTF8 findEndOfWhitespace() const noexcept
Returns the first non-whitespace character in the string.
static int getHexDigitValue(juce_wchar digit) noexcept
Returns 0 to 16 for '0' to 'F", or -1 for characters that aren't a legal hex digit.
static bool isWhitespace(char character) noexcept
Checks whether a character is whitespace.
static bool isLetterOrDigit(char character) noexcept
Checks whether a character is alphabetic or numeric.
static CharPointerType1 find(CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
Returns a pointer to the first occurrence of a substring in a string.
static int compareUpTo(CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
Compares two null-terminated character strings, up to a given number of characters.
Represents a local file or directory.
Writes data to an internal memory buffer, which grows as required.
const void * getData() const noexcept
Returns a pointer to the data that has been written to the stream.
String toString() const
Attempts to detect the encoding of the data and convert it to a string.
size_t getDataSize() const noexcept
Returns the number of bytes of data that have been written to the stream.
int64 writeFromInputStream(InputStream &, int64 maxNumBytesToWrite) override
Reads data from an input stream and writes it to this stream.
virtual bool writeByte(char byte)
Writes a single byte to the stream.
void insert(int index, String stringToAdd)
Inserts a string into the array.
void clear()
Removes all elements from the array.
int size() const noexcept
Returns the number of strings in the array.
void trim()
Deletes any whitespace characters from the starts and ends of all the strings.
void remove(int index)
Removes a string from the array.
int addTokens(StringRef stringToTokenise, bool preserveQuotedStrings)
Breaks up a string into tokens and adds them to this array.
A simple class for holding temporary references to a string literal or String.
CharPointerType getCharPointer() const noexcept
Returns the character pointer currently being used to store this string.
int indexOfChar(juce_wchar characterToLookFor) const noexcept
Searches for a character inside this string.
String trim() const
Returns a copy of this string with any whitespace characters removed from the start and end.
bool isEmpty() const noexcept
Returns true if the string contains no characters.
void clear() noexcept
Resets this string to be empty.
int lastIndexOfChar(juce_wchar character) const noexcept
Searches for a character inside this string (working backwards from the end of the string).
String trimCharactersAtEnd(StringRef charactersToTrim) const
Returns a copy of this string, having removed a specified set of characters from its end.
static String charToString(juce_wchar character)
Creates a string from a single character.
String substring(int startIndex, int endIndex) const
Returns a subsection of the string.
CharPointer_UTF8 CharPointerType
This is the character encoding type used internally to store the string.
bool isNotEmpty() const noexcept
Returns true if the string contains at least one character.
Parses a text-based XML document and creates an XmlElement object from it.
const String & getLastParseError() const noexcept
Returns the parsing error that occurred the last time getDocumentElement was called.
std::unique_ptr< XmlElement > getDocumentElementIfTagMatches(StringRef requiredTag)
Does an inexpensive check to see whether the outer element has the given tag name,...
std::unique_ptr< XmlElement > getDocumentElement(bool onlyReadOuterDocumentElement=false)
Creates an XmlElement object to represent the main document node.
XmlDocument(const String &documentText)
Creates an XmlDocument from the xml text.
static std::unique_ptr< XmlElement > parse(const File &file)
A handy static method that parses a file.
void setInputSource(InputSource *newSource) noexcept
Sets an input source object to use for parsing documents that reference external entities.
~XmlDocument()
Destructor.
void setEmptyTextElementsIgnored(bool shouldBeIgnored) noexcept
Sets a flag to change the treatment of empty text elements.
static XmlElement * createTextElement(const String &text)
Creates a text element that can be added to a parent element.