// Copyright (c) 2023 Dominic Masters // // This software is released under the MIT License. // https://opensource.org/licenses/MIT #include "Xml.hpp" using namespace Dawn; bool_t Xml::isWhitespace(char_t c) { return c == ' ' || c == '\r' || c == '\n' || c == '\t'; } Xml Xml::load(std::string data) { size_t j = 0; Xml xml; Xml::load(&xml, data, &j); return xml; } void Xml::load(Xml *xml, std::string data, size_t *j) { char_t c; int32_t level = 0; enum XmlParseState doing = XML_PARSE_STATE_DOING_NOTHING; enum XmlParseState doingBeforeComment; bool_t insideTag = false; std::string buffer = ""; std::string attrKey = ""; size_t i = *j; while(c = data[i++]) { switch(doing) { case XML_PARSE_STATE_DOING_NOTHING: if(c == '>') continue; if(c == '<') { if(data[i] == '!' && data[i+1] == '-' && data[i+2] == '-') { doingBeforeComment = doing; doing = XML_PARSE_STATE_PARSING_COMMENT; i += 3; } else if(insideTag) { i -= 1; auto child = new Xml(); Xml::load(child, data, &i); xml->children.push_back(child); doing = XML_PARSE_STATE_PARSING_CHILD; } else { doing = XML_PARSE_STATE_PARSING_TAG_NAME; level++; insideTag = true; } continue; } if(Xml::isWhitespace(c)) continue; doing = XML_PARSE_STATE_PARSING_VALUE; buffer += c; break; case XML_PARSE_STATE_PARSING_TAG_NAME: // Just keep reading until we either hit a space (end of the tag name) // or a closing tag value, either / or > if(Xml::isWhitespace(c) || c == '>' || c == '/') { xml->node = buffer; buffer = ""; if(c == '/') { level--; insideTag = false; doing = XML_PARSE_STATE_PARSING_CLOSE; } else { doing = c == '>' ? XML_PARSE_STATE_DOING_NOTHING : XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE; } continue; } buffer += c; break; case XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE: // Look until we hit either the end of a tag, or the attribute itself if(Xml::isWhitespace(c) || c == '>' || c == '/' || c == '=') { if(c == '>' || c == '/') { doing = XML_PARSE_STATE_DOING_NOTHING; if(c == '/') { level--; insideTag = false; doing = XML_PARSE_STATE_PARSING_CLOSE; } } else if(c == '=') { doing = XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE_VALUE; } else { doing = XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE; } if(buffer.size() > 0) { attrKey = buffer; xml->attributes[buffer] = ""; buffer = ""; } continue; } buffer += c; break; case XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE_VALUE: // Keep looking until we find a quote mark if(Xml::isWhitespace(c)) continue; if(c == '>' || c == '/') { doing = XML_PARSE_STATE_DOING_NOTHING; insideTag = false; continue; } if(c != '"') continue; doing = XML_PARSE_STATE_PARSING_ATTRIBUTE_VALUE; break; case XML_PARSE_STATE_PARSING_ATTRIBUTE_VALUE: // Parse the attribute value until we find a quote mark. if(c == '"') { doing = XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE; xml->attributes[attrKey] = buffer; buffer = ""; continue; } buffer += c; break; case XML_PARSE_STATE_PARSING_VALUE: // Keep parsing child until we find a < for an opening/closing tag. if(c == '<') { // In HTML Spec there could be a child here but not in XML spec. doing = XML_PARSE_STATE_PARSING_CLOSE; xml->value = buffer; buffer = ""; continue; } buffer += c; break; case XML_PARSE_STATE_PARSING_CHILD: if(c == '<') { // Read ahead and confirm this is a close or not if(data[i] == '/') { doing = XML_PARSE_STATE_PARSING_CLOSE; continue; } if(data[i] == '!' && data[i+1] == '-' && data[i+2] == '-') { doingBeforeComment = doing; doing = XML_PARSE_STATE_PARSING_COMMENT; i += 3; continue; } // Likely another child. auto child = new Xml(); i -= 1; Xml::load(child, data, &i); xml->children.push_back(child); } if(Xml::isWhitespace(c)) continue; // In HTML Spec there's a chance for there to be a value here, but not // in the XML spec. break; case XML_PARSE_STATE_PARSING_CLOSE: // Just keep parsing until the tag closer finishes. if(c != '>') continue; doing = XML_PARSE_STATE_DOING_NOTHING; //TODO: Return index or something? *j = i; return; case XML_PARSE_STATE_PARSING_COMMENT: if(c != '-') continue; if(data[i] != '-') continue; if(data[i+1] != '>') continue; i += 2; doing = doingBeforeComment; break; default: break; } } *j = i; } std::vector Xml::getChildrenOfType(std::string type) { std::vector children; auto itChildren = this->children.begin(); while(itChildren != this->children.end()) { auto child = *itChildren; if(child->node == type) children.push_back(child); ++itChildren; } return children; } Xml * Xml::getFirstChildOfType(std::string type) { auto itChildren = this->children.begin(); while(itChildren != this->children.end()) { auto child = *itChildren; if(child->node == type) return child; ++itChildren; } return nullptr; } Xml::~Xml() { auto it = this->children.begin(); while(it != this->children.end()) { delete *it; ++it; } }