New XML System, first pass.
This commit is contained in:
@ -7,6 +7,12 @@
|
||||
|
||||
using namespace Dawn;
|
||||
|
||||
XmlNode::XmlNode() {
|
||||
this->child = nullptr;
|
||||
this->value.clear();
|
||||
}
|
||||
|
||||
|
||||
bool_t Xml::isWhitespace(char_t c) {
|
||||
return c == ' ' || c == '\r' || c == '\n' || c == '\t';
|
||||
}
|
||||
@ -26,9 +32,8 @@ void Xml::load(Xml *xml, std::string data, size_t *j) {
|
||||
bool_t insideTag = false;
|
||||
std::string buffer = "";
|
||||
std::string attrKey = "";
|
||||
std::string bufferWhitespaces;
|
||||
bool_t valueIsInWhitespace = false;
|
||||
size_t i = *j;
|
||||
struct XmlNode childNode;
|
||||
|
||||
while(c = data[i++]) {
|
||||
if(insideTag) {
|
||||
@ -39,26 +44,21 @@ void Xml::load(Xml *xml, std::string data, size_t *j) {
|
||||
case XML_PARSE_STATE_DOING_NOTHING:
|
||||
if(c == '>') continue;
|
||||
if(c == '<') {
|
||||
// Parsing comment?
|
||||
if(data[i] == '!' && data[i+1] == '-' && data[i+2] == '-') {
|
||||
doingBeforeComment = doing;
|
||||
doing = XML_PARSE_STATE_PARSING_COMMENT;
|
||||
i += 3;
|
||||
} else if(data[i] == '!' && !insideTag) {
|
||||
// Likely <!DOCTYPE ...>
|
||||
while((c = data[i++]) != '>') {
|
||||
// Nothing needs doing here right now, in future may support doctype
|
||||
}
|
||||
continue;
|
||||
} else if(insideTag) {
|
||||
if(data[i] == '/') {
|
||||
i -= 1;
|
||||
doing = XML_PARSE_STATE_PARSING_CHILD;
|
||||
} else {
|
||||
i -= 1;
|
||||
auto child = new Xml();
|
||||
Xml::load(child, data, &i);
|
||||
xml->children.push_back(child);
|
||||
doing = XML_PARSE_STATE_PARSING_CHILD;
|
||||
|
||||
|
||||
// Remove last char since we kinda already parsed it.
|
||||
xml->innerXml += child->outerXml;
|
||||
xml->outerXml = xml->outerXml.substr(0, xml->outerXml.size()-1);
|
||||
xml->outerXml += child->outerXml;
|
||||
doing = XML_PARSE_STATE_PARSING_CLOSE;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
doing = XML_PARSE_STATE_PARSING_TAG_NAME;
|
||||
@ -69,8 +69,8 @@ void Xml::load(Xml *xml, std::string data, size_t *j) {
|
||||
continue;
|
||||
}
|
||||
|
||||
xml->innerXml += c;
|
||||
if(Xml::isWhitespace(c)) continue;
|
||||
if(insideTag) xml->innerXml += c;
|
||||
if(Xml::isWhitespace(c)) continue;// NEEDS TO GO?
|
||||
doing = XML_PARSE_STATE_PARSING_VALUE;
|
||||
buffer += c;
|
||||
break;
|
||||
@ -86,7 +86,7 @@ void Xml::load(Xml *xml, std::string data, size_t *j) {
|
||||
insideTag = false;
|
||||
doing = XML_PARSE_STATE_PARSING_CLOSE;
|
||||
} else {
|
||||
doing = c == '>' ? XML_PARSE_STATE_DOING_NOTHING : XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE;
|
||||
doing = c == '>' ? XML_PARSE_STATE_PARSING_VALUE : XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@ -98,7 +98,7 @@ void Xml::load(Xml *xml, std::string data, size_t *j) {
|
||||
// Look until we hit either the end of a tag, or the attribute itself
|
||||
if(Xml::isWhitespace(c) || c == '>' || c == '/' || c == '=') {
|
||||
if(c == '>' || c == '/') {
|
||||
doing = XML_PARSE_STATE_DOING_NOTHING;
|
||||
doing = XML_PARSE_STATE_PARSING_VALUE;
|
||||
if(c == '/') {
|
||||
level--;
|
||||
insideTag = false;
|
||||
@ -149,116 +149,86 @@ void Xml::load(Xml *xml, std::string data, size_t *j) {
|
||||
case XML_PARSE_STATE_PARSING_VALUE:
|
||||
// Keep parsing child until we find a < for an opening/closing tag.
|
||||
if(c == '<' && !(data[i] == '<' || data[i-2] == '<')) {
|
||||
if(buffer.size() > 0) {
|
||||
childNode.nodeType = XML_NODE_TYPE_TEXT;
|
||||
childNode.value = buffer;
|
||||
xml->childNodes.push_back(childNode);
|
||||
}
|
||||
|
||||
// Are we parsing the close tag, or parsing a child?
|
||||
if(data[i] == '/') {
|
||||
// In HTML Spec there could be a child here but not in XML spec.
|
||||
doing = XML_PARSE_STATE_PARSING_CLOSE;
|
||||
xml->value = buffer;
|
||||
xml->textContent = buffer;
|
||||
buffer.clear();
|
||||
valueIsInWhitespace = false;
|
||||
bufferWhitespaces.clear();
|
||||
continue;
|
||||
}
|
||||
|
||||
std::cout << "Detected unsupported use of a child within a node value, e.g. <div>Hello <b>world</b> how are you?</div>" << std::endl;
|
||||
throw "Test";
|
||||
continue;
|
||||
}
|
||||
|
||||
xml->innerXml += c;
|
||||
|
||||
if(Xml::isWhitespace(c)) {
|
||||
if(!valueIsInWhitespace) {
|
||||
bufferWhitespaces.clear();
|
||||
bufferWhitespaces += c;
|
||||
valueIsInWhitespace = true;
|
||||
} else {
|
||||
if(c != ' ') bufferWhitespaces += c;
|
||||
}
|
||||
// TODO: I can maybe consider indentation here
|
||||
} else {
|
||||
if(valueIsInWhitespace) {
|
||||
buffer += bufferWhitespaces;
|
||||
valueIsInWhitespace = false;
|
||||
}
|
||||
|
||||
if(c == '&') {
|
||||
// Handle special characters. First read ahead to nearest semicolon OR
|
||||
// nearest closing tag.
|
||||
std::string sc;
|
||||
while(c = data[i++]) {
|
||||
xml->innerXml += c;
|
||||
if(c == ';') break;
|
||||
if(c == '<') assertUnreachable();//Invalid XML
|
||||
sc += c;
|
||||
}
|
||||
|
||||
if(valueIsInWhitespace) {
|
||||
buffer += bufferWhitespaces;
|
||||
valueIsInWhitespace = false;
|
||||
}
|
||||
|
||||
if(sc == "lt") {
|
||||
buffer += '<';
|
||||
} else if(sc == "gt") {
|
||||
buffer += '>';
|
||||
} else if(sc == "amp") {
|
||||
buffer += '&';
|
||||
} else if(sc == "apos") {
|
||||
buffer += '\'';
|
||||
} else if(sc == "quot") {
|
||||
buffer += '"';
|
||||
} else if(sc == "nbsp") {
|
||||
buffer += ' ';
|
||||
} else {
|
||||
// Try parse as integer
|
||||
if(sc.size() > 1 && sc[0] == '#') {
|
||||
int code = std::stoi(sc.substr(1));
|
||||
buffer += (char)code;
|
||||
} else {
|
||||
std::cout << "Unknown Special character: " << sc << std::endl;
|
||||
assertUnreachable();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
buffer += c;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case XML_PARSE_STATE_PARSING_CHILD:
|
||||
if(c == '<') {
|
||||
// Read ahead and confirm this is a close or not
|
||||
if(data[i] == '/') {
|
||||
doing = XML_PARSE_STATE_PARSING_CLOSE;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(data[i] == '!' && data[i+1] == '-' && data[i+2] == '-') {
|
||||
} else if(data[i] == '!' && data[i+1] == '-' && data[i+2] == '-') {
|
||||
doingBeforeComment = doing;
|
||||
doing = XML_PARSE_STATE_PARSING_COMMENT;
|
||||
i += 3;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Likely another child.
|
||||
auto child = new Xml();
|
||||
// Parsing child
|
||||
i -= 1;
|
||||
|
||||
// @deprecated
|
||||
auto child = new Xml();
|
||||
Xml::load(child, data, &i);
|
||||
xml->children.push_back(child);
|
||||
|
||||
childNode = XmlNode();
|
||||
childNode.nodeType = XML_NODE_TYPE_ELEMENT;
|
||||
childNode.child = child;
|
||||
xml->childNodes.push_back(childNode);
|
||||
|
||||
// Remove last char since we kinda already parsed it.
|
||||
xml->innerXml += child->outerXml;
|
||||
xml->outerXml = xml->outerXml.substr(0, xml->outerXml.size()-1);
|
||||
xml->outerXml += child->outerXml;
|
||||
}
|
||||
|
||||
if(Xml::isWhitespace(c)) {
|
||||
xml->innerXml += c;
|
||||
buffer.clear();
|
||||
continue;
|
||||
}
|
||||
|
||||
// In HTML Spec there's a chance for there to be a value here, but not
|
||||
// in the XML spec.
|
||||
xml->innerXml += c;
|
||||
|
||||
if(c == '&') {
|
||||
// Handle special characters. First read ahead to nearest semicolon OR
|
||||
// nearest closing tag.
|
||||
std::string sc;
|
||||
while(c = data[i++]) {
|
||||
xml->innerXml += c;
|
||||
if(c == ';') break;
|
||||
if(c == '<') assertUnreachable();//Invalid XML
|
||||
sc += c;
|
||||
}
|
||||
|
||||
if(sc == "lt") {
|
||||
buffer += '<';
|
||||
} else if(sc == "gt") {
|
||||
buffer += '>';
|
||||
} else if(sc == "amp") {
|
||||
buffer += '&';
|
||||
} else if(sc == "apos") {
|
||||
buffer += '\'';
|
||||
} else if(sc == "quot") {
|
||||
buffer += '"';
|
||||
} else if(sc == "nbsp") {
|
||||
buffer += ' ';
|
||||
} else {
|
||||
// Try parse as integer
|
||||
if(sc.size() > 1 && sc[0] == '#') {
|
||||
int code = std::stoi(sc.substr(1));
|
||||
buffer += (char)code;
|
||||
} else {
|
||||
std::cout << "Unknown Special character: " << sc << std::endl;
|
||||
assertUnreachable();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
buffer += c;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case XML_PARSE_STATE_PARSING_CLOSE:
|
||||
|
@ -17,11 +17,18 @@ namespace Dawn {
|
||||
XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE_VALUE,
|
||||
XML_PARSE_STATE_PARSING_ATTRIBUTE_VALUE,
|
||||
XML_PARSE_STATE_PARSING_VALUE,
|
||||
XML_PARSE_STATE_PARSING_CHILD,
|
||||
XML_PARSE_STATE_PARSING_CLOSE,
|
||||
XML_PARSE_STATE_PARSING_COMMENT
|
||||
};
|
||||
|
||||
class Xml;
|
||||
struct XmlNode;
|
||||
|
||||
enum XmlNodeType {
|
||||
XML_NODE_TYPE_TEXT,
|
||||
XML_NODE_TYPE_ELEMENT
|
||||
};
|
||||
|
||||
class Xml {
|
||||
protected:
|
||||
static bool_t isWhitespace(char_t c);
|
||||
@ -31,10 +38,13 @@ namespace Dawn {
|
||||
static void load(Xml *xml, std::string data, size_t *j);
|
||||
|
||||
std::string node;
|
||||
std::string value;
|
||||
std::string innerXml;
|
||||
std::string outerXml;
|
||||
std::string textContent;
|
||||
std::map<std::string, std::string> attributes;
|
||||
std::vector<struct XmlNode> childNodes;
|
||||
|
||||
// @deprecated
|
||||
std::vector<Xml*> children;
|
||||
|
||||
std::vector<Xml*> getChildrenOfType(std::string type);
|
||||
@ -42,4 +52,12 @@ namespace Dawn {
|
||||
|
||||
~Xml();
|
||||
};
|
||||
|
||||
struct XmlNode {
|
||||
enum XmlNodeType nodeType;
|
||||
std::string value;
|
||||
Xml *child;
|
||||
|
||||
XmlNode();
|
||||
};
|
||||
}
|
Reference in New Issue
Block a user