243 lines
6.6 KiB
C++
243 lines
6.6 KiB
C++
// Copyright (c) 2023 Dominic Masters
|
|
//
|
|
// This software is released under the MIT License.
|
|
// https://opensource.org/licenses/MIT
|
|
|
|
#include "Xml.hpp"
|
|
|
|
using namespace Dawn;
|
|
|
|
bool_t Xml::isWhitespace(char_t c) {
|
|
return c == ' ' || c == '\r' || c == '\n' || c == '\t';
|
|
}
|
|
|
|
Xml Xml::load(std::string data) {
|
|
size_t j = 0;
|
|
Xml xml;
|
|
Xml::load(&xml, data, &j);
|
|
return xml;
|
|
}
|
|
|
|
void Xml::load(Xml *xml, std::string data, size_t *j) {
|
|
char_t c;
|
|
int32_t level = 0;
|
|
enum XmlParseState doing = XML_PARSE_STATE_DOING_NOTHING;
|
|
enum XmlParseState doingBeforeComment;
|
|
bool_t insideTag = false;
|
|
std::string buffer = "";
|
|
std::string attrKey = "";
|
|
std::string bufferWhitespaces;
|
|
bool_t valueIsInWhitespace = false;
|
|
size_t i = *j;
|
|
|
|
while(c = data[i++]) {
|
|
switch(doing) {
|
|
case XML_PARSE_STATE_DOING_NOTHING:
|
|
if(c == '>') continue;
|
|
if(c == '<') {
|
|
if(data[i] == '!' && data[i+1] == '-' && data[i+2] == '-') {
|
|
doingBeforeComment = doing;
|
|
doing = XML_PARSE_STATE_PARSING_COMMENT;
|
|
i += 3;
|
|
} else if(insideTag) {
|
|
i -= 1;
|
|
auto child = new Xml();
|
|
Xml::load(child, data, &i);
|
|
xml->children.push_back(child);
|
|
doing = XML_PARSE_STATE_PARSING_CHILD;
|
|
} else {
|
|
doing = XML_PARSE_STATE_PARSING_TAG_NAME;
|
|
level++;
|
|
insideTag = true;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if(Xml::isWhitespace(c)) continue;
|
|
doing = XML_PARSE_STATE_PARSING_VALUE;
|
|
buffer += c;
|
|
break;
|
|
|
|
case XML_PARSE_STATE_PARSING_TAG_NAME:
|
|
// Just keep reading until we either hit a space (end of the tag name)
|
|
// or a closing tag value, either / or >
|
|
if(Xml::isWhitespace(c) || c == '>' || c == '/') {
|
|
xml->node = buffer;
|
|
buffer = "";
|
|
if(c == '/') {
|
|
level--;
|
|
insideTag = false;
|
|
doing = XML_PARSE_STATE_PARSING_CLOSE;
|
|
} else {
|
|
doing = c == '>' ? XML_PARSE_STATE_DOING_NOTHING : XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE;
|
|
}
|
|
continue;
|
|
}
|
|
buffer += c;
|
|
break;
|
|
|
|
|
|
case XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE:
|
|
// Look until we hit either the end of a tag, or the attribute itself
|
|
if(Xml::isWhitespace(c) || c == '>' || c == '/' || c == '=') {
|
|
if(c == '>' || c == '/') {
|
|
doing = XML_PARSE_STATE_DOING_NOTHING;
|
|
if(c == '/') {
|
|
level--;
|
|
insideTag = false;
|
|
doing = XML_PARSE_STATE_PARSING_CLOSE;
|
|
}
|
|
} else if(c == '=') {
|
|
doing = XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE_VALUE;
|
|
} else {
|
|
doing = XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE;
|
|
}
|
|
|
|
if(buffer.size() > 0) {
|
|
attrKey = buffer;
|
|
xml->attributes[buffer] = "";
|
|
buffer = "";
|
|
}
|
|
continue;
|
|
}
|
|
buffer += c;
|
|
break;
|
|
|
|
case XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE_VALUE:
|
|
// Keep looking until we find a quote mark
|
|
if(Xml::isWhitespace(c)) continue;
|
|
if(c == '>' || c == '/') {
|
|
doing = XML_PARSE_STATE_DOING_NOTHING;
|
|
insideTag = false;
|
|
continue;
|
|
}
|
|
|
|
if(c != '"') continue;
|
|
doing = XML_PARSE_STATE_PARSING_ATTRIBUTE_VALUE;
|
|
break;
|
|
|
|
case XML_PARSE_STATE_PARSING_ATTRIBUTE_VALUE:
|
|
// Parse the attribute value until we find a quote mark.
|
|
if(c == '"') {
|
|
doing = XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE;
|
|
xml->attributes[attrKey] = buffer;
|
|
buffer = "";
|
|
continue;
|
|
}
|
|
buffer += c;
|
|
break;
|
|
|
|
case XML_PARSE_STATE_PARSING_VALUE:
|
|
// Keep parsing child until we find a < for an opening/closing tag.
|
|
if(c == '<') {
|
|
// In HTML Spec there could be a child here but not in XML spec.
|
|
doing = XML_PARSE_STATE_PARSING_CLOSE;
|
|
xml->value = buffer;
|
|
buffer.clear();
|
|
valueIsInWhitespace = false;
|
|
bufferWhitespaces.clear();
|
|
continue;
|
|
}
|
|
|
|
if(Xml::isWhitespace(c)) {
|
|
if(!valueIsInWhitespace) {
|
|
bufferWhitespaces.clear();
|
|
bufferWhitespaces += c;
|
|
valueIsInWhitespace = true;
|
|
} else {
|
|
if(c != ' ') bufferWhitespaces += c;
|
|
}
|
|
// TODO: I can maybe consider indentation here
|
|
} else {
|
|
if(valueIsInWhitespace) {
|
|
buffer += bufferWhitespaces;
|
|
valueIsInWhitespace = false;
|
|
}
|
|
buffer += c;
|
|
}
|
|
|
|
break;
|
|
|
|
case XML_PARSE_STATE_PARSING_CHILD:
|
|
if(c == '<') {
|
|
// Read ahead and confirm this is a close or not
|
|
if(data[i] == '/') {
|
|
doing = XML_PARSE_STATE_PARSING_CLOSE;
|
|
continue;
|
|
}
|
|
|
|
if(data[i] == '!' && data[i+1] == '-' && data[i+2] == '-') {
|
|
doingBeforeComment = doing;
|
|
doing = XML_PARSE_STATE_PARSING_COMMENT;
|
|
i += 3;
|
|
continue;
|
|
}
|
|
|
|
// Likely another child.
|
|
auto child = new Xml();
|
|
i -= 1;
|
|
Xml::load(child, data, &i);
|
|
xml->children.push_back(child);
|
|
}
|
|
|
|
if(Xml::isWhitespace(c)) continue;
|
|
|
|
// In HTML Spec there's a chance for there to be a value here, but not
|
|
// in the XML spec.
|
|
break;
|
|
|
|
case XML_PARSE_STATE_PARSING_CLOSE:
|
|
// Just keep parsing until the tag closer finishes.
|
|
if(c != '>') continue;
|
|
doing = XML_PARSE_STATE_DOING_NOTHING;
|
|
|
|
//TODO: Return index or something?
|
|
*j = i;
|
|
return;
|
|
|
|
case XML_PARSE_STATE_PARSING_COMMENT:
|
|
if(c != '-') continue;
|
|
if(data[i] != '-') continue;
|
|
if(data[i+1] != '>') continue;
|
|
i += 2;
|
|
doing = doingBeforeComment;
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
*j = i;
|
|
}
|
|
|
|
|
|
std::vector<Xml*> Xml::getChildrenOfType(std::string type) {
|
|
std::vector<Xml*> children;
|
|
auto itChildren = this->children.begin();
|
|
while(itChildren != this->children.end()) {
|
|
auto child = *itChildren;
|
|
if(child->node == type) children.push_back(child);
|
|
++itChildren;
|
|
}
|
|
return children;
|
|
}
|
|
|
|
Xml * Xml::getFirstChildOfType(std::string type) {
|
|
auto itChildren = this->children.begin();
|
|
while(itChildren != this->children.end()) {
|
|
auto child = *itChildren;
|
|
if(child->node == type) return child;
|
|
++itChildren;
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
|
|
Xml::~Xml() {
|
|
auto it = this->children.begin();
|
|
while(it != this->children.end()) {
|
|
delete *it;
|
|
++it;
|
|
}
|
|
} |