From 97d2de2cae02324e22bcb5e2e73b1bcd0959ae1e Mon Sep 17 00:00:00 2001
From: Dominic Masters <dominic@domsplace.com>
Date: Tue, 7 Feb 2023 21:22:57 -0800
Subject: [PATCH] Updated locale gen

---
 CMakeLists.txt                                |   4 +-
 src/dawn/CMakeLists.txt                       |   7 +-
 src/dawnshared/CMakeLists.txt                 |   9 +
 .../assert/CMakeLists.txt                     |   0
 src/{dawn => dawnshared}/assert/assert.cpp    |   0
 src/{dawn => dawnshared}/assert/assert.hpp    |   2 +-
 src/dawnshared/dawnsharedlibs.hpp             |   4 +-
 src/dawnshared/util/array.hpp                 |   2 +-
 .../locale/languagegen/CMakeLists.txt         |   5 +
 .../locale/languagegen/LanguageGen.cpp        |  99 ++++----
 .../locale/languagegen/LanguageGen.hpp        |   5 +-
 src/dawntools/util/XmlNew.cpp                 | 223 ++++++++++++++++++
 src/dawntools/util/XmlNew.hpp                 |  43 ++++
 13 files changed, 343 insertions(+), 60 deletions(-)
 rename src/{dawn => dawnshared}/assert/CMakeLists.txt (100%)
 rename src/{dawn => dawnshared}/assert/assert.cpp (100%)
 rename src/{dawn => dawnshared}/assert/assert.hpp (97%)
 create mode 100644 src/dawntools/util/XmlNew.cpp
 create mode 100644 src/dawntools/util/XmlNew.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 86b327ff..963dcdd3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,9 +4,9 @@
 # https://opensource.org/licenses/MIT
 
 cmake_minimum_required(VERSION 3.13)
-set(CMAKE_C_STANDARD 99)
+set(CMAKE_C_STANDARD 20)
 set(CMAKE_C_STANDARD_REQUIRED ON)
-set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED True)
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/modules/")
 
diff --git a/src/dawn/CMakeLists.txt b/src/dawn/CMakeLists.txt
index 6630eb9d..fb66e0fd 100644
--- a/src/dawn/CMakeLists.txt
+++ b/src/dawn/CMakeLists.txt
@@ -17,8 +17,13 @@ target_include_directories(${DAWN_TARGET_NAME}
     ${CMAKE_CURRENT_LIST_DIR}
 )
 
+
+target_sources(${DAWN_TARGET_NAME}
+  PRIVATE
+    ${DAWN_SHARED_SOURCES}
+)
+
 # Subdirs
-add_subdirectory(assert)
 add_subdirectory(asset)
 add_subdirectory(display)
 add_subdirectory(input)
diff --git a/src/dawnshared/CMakeLists.txt b/src/dawnshared/CMakeLists.txt
index 203c8ff9..08003e0e 100644
--- a/src/dawnshared/CMakeLists.txt
+++ b/src/dawnshared/CMakeLists.txt
@@ -8,6 +8,15 @@ set(
   DAWN_SHARED_INCLUDES
     ${CMAKE_CURRENT_LIST_DIR}
     
+  CACHE INTERNAL
+    ${DAWN_CACHE_TARGET}
+)
+
+set(D ${CMAKE_CURRENT_LIST_DIR})
+set(
+  DAWN_SHARED_SOURCES
+    ${D}/assert/assert.cpp
+
   CACHE INTERNAL
     ${DAWN_CACHE_TARGET}
 )
\ No newline at end of file
diff --git a/src/dawn/assert/CMakeLists.txt b/src/dawnshared/assert/CMakeLists.txt
similarity index 100%
rename from src/dawn/assert/CMakeLists.txt
rename to src/dawnshared/assert/CMakeLists.txt
diff --git a/src/dawn/assert/assert.cpp b/src/dawnshared/assert/assert.cpp
similarity index 100%
rename from src/dawn/assert/assert.cpp
rename to src/dawnshared/assert/assert.cpp
diff --git a/src/dawn/assert/assert.hpp b/src/dawnshared/assert/assert.hpp
similarity index 97%
rename from src/dawn/assert/assert.hpp
rename to src/dawnshared/assert/assert.hpp
index 879771cb..d8592812 100644
--- a/src/dawn/assert/assert.hpp
+++ b/src/dawnshared/assert/assert.hpp
@@ -6,7 +6,7 @@
  */
 
 #pragma once
-#include "dawnlibs.hpp"
+#include "dawnsharedlibs.hpp"
 
 #define ASSERTS_ENABLED 1
 
diff --git a/src/dawnshared/dawnsharedlibs.hpp b/src/dawnshared/dawnsharedlibs.hpp
index 5a0a039e..55cde3de 100644
--- a/src/dawnshared/dawnsharedlibs.hpp
+++ b/src/dawnshared/dawnsharedlibs.hpp
@@ -19,6 +19,7 @@ extern "C" {
   #include <float.h>
 
   typedef bool bool_t;
+  typedef char char_t;
 }
 
 #include <vector>
@@ -28,4 +29,5 @@ extern "C" {
 #include <array>
 #include <memory>
 #include <algorithm>
-#include <sstream>
\ No newline at end of file
+#include <sstream>
+#include <string>
\ No newline at end of file
diff --git a/src/dawnshared/util/array.hpp b/src/dawnshared/util/array.hpp
index ab64da5a..8b53ec7d 100644
--- a/src/dawnshared/util/array.hpp
+++ b/src/dawnshared/util/array.hpp
@@ -4,7 +4,7 @@
 // https://opensource.org/licenses/MIT
 
 #pragma once
-#include "dawnlibs.hpp"
+#include "dawnsharedlibs.hpp"
 #include "assert/assert.hpp"
 
 namespace Dawn {
diff --git a/src/dawntools/locale/languagegen/CMakeLists.txt b/src/dawntools/locale/languagegen/CMakeLists.txt
index 1589d8e0..a209a524 100644
--- a/src/dawntools/locale/languagegen/CMakeLists.txt
+++ b/src/dawntools/locale/languagegen/CMakeLists.txt
@@ -8,18 +8,23 @@ project(languagegen VERSION 2.0)
 add_executable(languagegen)
 target_sources(languagegen
   PRIVATE
+    ${DAWN_SHARED_SOURCES}
     LanguageGen.cpp
     ../../util/DawnTool.cpp
+    ../../util/XmlNew.cpp
     ../../util/file.cpp
     ../../util/csv.cpp
     ../../util/xml.cpp
 )
+
 target_include_directories(languagegen
   PUBLIC
     ${DAWN_SHARED_INCLUDES}
     ${CMAKE_CURRENT_LIST_DIR}/../../
     ${CMAKE_CURRENT_LIST_DIR}
 )
+
+
 target_link_libraries(languagegen
   PUBLIC
     ${DAWN_BUILD_HOST_LIBS}
diff --git a/src/dawntools/locale/languagegen/LanguageGen.cpp b/src/dawntools/locale/languagegen/LanguageGen.cpp
index 9b44ec96..592076ce 100644
--- a/src/dawntools/locale/languagegen/LanguageGen.cpp
+++ b/src/dawntools/locale/languagegen/LanguageGen.cpp
@@ -14,7 +14,7 @@ LanguageGen::LanguageGen(const int argc, const char *argv[]) :
 {
 }
 
-int32_t LanguageGen::start() {
+int32_t LanguageGen::start() {  
   if(this->args.size() != 3) {
     std::cout << "Invalid number of arguments provided to language gen!" << std::endl;
     return 1;
@@ -32,53 +32,47 @@ int32_t LanguageGen::start() {
   if(buffer == NULL) {
     std::cout << "Failed to allocate memory for locale string XML" << std::endl;
     fclose(fileIn);
-    return 1;
+    return 1; 
   }
 
   assetReadString(fileIn, buffer);
   fclose(fileIn);
-  
-  xml_t xml;
-  xmlLoad(&xml, buffer);
+  auto xml = Xml::load(std::string(buffer));
   free(buffer);
-  
+
   // Begin parsing. Start by looking for the <language> tags
   std::vector<std::string> languages;
-  for(int32_t i = 0; i < xml.childrenCount; i++) {
-    auto c = xml.children + i;
-    if(std::string(c->node) != "language") continue;
-    auto attrName = xmlGetAttributeByName(c, "name");
-    
-    if(attrName == -1) {
-      std::cout << "Missing name param on language node" << std::endl;
-      xmlDispose(&xml);
-      return 1;
+  auto itChildren = xml.children.begin();
+  while(itChildren != xml.children.end()) {
+    auto child = *itChildren;
+    if(child->node == "language") {
+      auto attrName = child->attributes.find("name");
+      if(attrName == child->attributes.end()) {
+        std::cout << "Missing name param on language node" << std::endl;
+        return 1;
+      }
+      languages.push_back(attrName->second);
     }
-    languages.push_back(std::string(c->attributeDatas[attrName]));
+    ++itChildren;
   }
 
   // Now begin actually parsing
   std::map<std::string, std::vector<struct LanguageString>> strings;
-  for(int32_t i = 0; i < xml.childrenCount; i++) {
-    auto c = xml.children + i;
-    if(std::string(c->node) == "group") {
-      auto ret = this->parseGroup(c, "", &strings);
-      if(ret != 0) {
-        xmlDispose(&xml);
-        return ret;
-      }
-    } else if(std::string(c->node) == "string") {
+  itChildren = xml.children.begin();
+  while(itChildren != xml.children.end()) {
+    auto child = *itChildren;
+    if(child->node == "group") {
+      auto ret = this->parseGroup(child, "", &strings);
+      if(ret != 0) return ret;
+    } else if(child->node == "string") {
       std::cout << "String cannot be a root node" << std::endl;
-      xmlDispose(&xml);
       return 1;
     }
+    ++itChildren;
   }
-
-  xmlDispose(&xml);
-
+  
   // Now we validate each lang has each key.
   std::vector<std::string> keys;
-
   auto it = strings.begin();
   while(it != strings.end()) {
     auto it2 = it->second.begin();
@@ -98,7 +92,7 @@ int32_t LanguageGen::start() {
   while(it != strings.end()) {
     std::vector<std::string> itKeys;
 
-    std::string bufferOut = "";
+    std::string bufferOut;
 
     auto it2 = it->second.begin();
     while(it2 != it->second.end()) {
@@ -119,8 +113,8 @@ int32_t LanguageGen::start() {
       return 1;
     }
 
-    const char *strOut = bufferOut.c_str();
-    fwrite(strOut, sizeof(char), strlen(strOut), fileOut);
+    const char_t *strOut = bufferOut.c_str();
+    fwrite(strOut, sizeof(char_t), strlen(strOut), fileOut);
     fclose(fileOut);
 
     auto it3 = keys.begin();
@@ -144,54 +138,55 @@ int32_t LanguageGen::start() {
 }
 
 int32_t LanguageGen::parseString(
-  xml_t *stringNode,
+  Xml *stringNode,
   std::string key,
   std::map<std::string,std::vector<struct LanguageString>> *strings
 ) {
-  auto attrLang = xmlGetAttributeByName(stringNode, "lang");
-  if(attrLang == -1) {
+  auto attrLang = stringNode->attributes.find("lang");
+  if(attrLang == stringNode->attributes.end()) {
     std::cout << "String is missing lang parameter." << std::endl;
     return -1;
   }
 
-  std::string lang(stringNode->attributeDatas[attrLang]);
   struct LanguageString str;
   str.key = key;
-  str.value = std::string(stringNode->value);
+  str.value = stringNode->value;
 
-  auto existing = (*strings).find(lang);
+  auto existing = (*strings).find(attrLang->second);
   if(existing == (*strings).end()) {
-    (*strings).insert(std::make_pair(lang, std::vector<struct LanguageString>()));
+    (*strings).insert(std::make_pair(attrLang->second, std::vector<struct LanguageString>()));
   }
-  (*strings)[lang].push_back(str);
+  (*strings)[attrLang->second].push_back(str);
   return 0;
 }
 
 int32_t LanguageGen::parseGroup(
-  xml_t *groupNode,
+  Xml *groupNode,
   std::string key,
-  std::map<std::string,std::vector<struct LanguageString>> *strings
+  std::map<std::string, std::vector<struct LanguageString>> *strings
 ) {
   int32_t ret;
 
-  auto attrKey = xmlGetAttributeByName(groupNode, "key");
-  if(attrKey == -1) {
+  auto attrKey = groupNode->attributes.find("key");
+  if(attrKey == groupNode->attributes.end()) {
     std::cout << "Group node is missing key" << std::endl;
     return 1;
   }
 
   if(key.size() > 0) key += ".";
-  key += std::string(groupNode->attributeDatas[attrKey]);
+  key += attrKey->second;
 
-  for(int32_t i = 0; i < groupNode->childrenCount; i++) {
-    auto c = groupNode->children + i;
-    if(std::string(c->node) == "string") {
-      ret = this->parseString(c, key, strings);
+  auto itChildren = groupNode->children.begin();
+  while(itChildren != groupNode->children.end()) {
+    auto child = *itChildren;
+    if(child->node == "string") {
+      ret = this->parseString(child, key, strings);
       if(ret != 0) return ret;
-    } else if(std::string(c->node) == "group") {
-      ret = this->parseGroup(c, key, strings);
+    } else if(child->node == "group") {
+      ret = this->parseGroup(child, key, strings);
       if(ret != 0) return ret;
     }
+    ++itChildren;
   }
 
   return 0;
diff --git a/src/dawntools/locale/languagegen/LanguageGen.hpp b/src/dawntools/locale/languagegen/LanguageGen.hpp
index d0331471..6651360c 100644
--- a/src/dawntools/locale/languagegen/LanguageGen.hpp
+++ b/src/dawntools/locale/languagegen/LanguageGen.hpp
@@ -5,6 +5,7 @@
 
 #pragma once
 #include "util/DawnTool.hpp"
+#include "util/XmlNew.hpp"
 
 namespace Dawn {
   struct LanguageString {
@@ -15,13 +16,13 @@ namespace Dawn {
   class LanguageGen : public DawnTool {
     protected:
       int32_t parseGroup(
-        xml_t *node,
+        Xml *node,
         std::string key,
         std::map<std::string, std::vector<struct LanguageString>> *strings
       );
 
       int32_t parseString(
-        xml_t *node,
+        Xml *node,
         std::string key,
         std::map<std::string, std::vector<struct LanguageString>> *strings
       );
diff --git a/src/dawntools/util/XmlNew.cpp b/src/dawntools/util/XmlNew.cpp
new file mode 100644
index 00000000..f4a9dae6
--- /dev/null
+++ b/src/dawntools/util/XmlNew.cpp
@@ -0,0 +1,223 @@
+// Copyright (c) 2023 Dominic Masters
+// 
+// This software is released under the MIT License.
+// https://opensource.org/licenses/MIT
+
+#include "XmlNew.hpp"
+#include "util/array.hpp"
+
+using namespace Dawn;
+
+bool_t Xml::isWhitespace(char_t c) {
+  return c == ' ' || c == '\r' || c == '\n' || c == '\t';
+}
+
+Xml Xml::load(std::string data) {
+  size_t j = 0;
+  Xml xml;
+  Xml::load(&xml, data, &j);
+  return xml;
+}
+
+void Xml::load(Xml *xml, std::string data, size_t *j) {
+  char_t c;
+  int32_t level = 0;
+  enum XmlParseState doing = XML_PARSE_STATE_DOING_NOTHING;
+  enum XmlParseState doingBeforeComment;
+  bool_t insideTag = false;
+  std::string buffer = "";
+  std::string attrKey = "";
+  size_t i = *j;
+  
+  while(c = data[i++]) {
+    switch(doing) {
+      case XML_PARSE_STATE_DOING_NOTHING:
+        if(c == '>') continue;
+        if(c == '<') {
+          if(data[i] == '!' && data[i+1] == '-' && data[i+2] == '-') {
+            doingBeforeComment = doing;
+            doing = XML_PARSE_STATE_PARSING_COMMENT;
+            i += 3;
+          } else if(insideTag) {
+            i -= 1;
+            auto child = new Xml();
+            Xml::load(child, data, &i);
+            xml->children.push_back(child);
+            doing = XML_PARSE_STATE_PARSING_CHILD;
+          } else {
+            doing = XML_PARSE_STATE_PARSING_TAG_NAME;
+            level++;
+            insideTag = true;
+          }
+          continue;
+        }
+
+        if(Xml::isWhitespace(c)) continue;
+        doing = XML_PARSE_STATE_PARSING_VALUE;
+        buffer += c;
+        break;
+      
+      case XML_PARSE_STATE_PARSING_TAG_NAME:
+        // Just keep reading until we either hit a space (end of the tag name)
+        // or a closing tag value, either / or >
+        if(Xml::isWhitespace(c) || c == '>' || c == '/') {
+          xml->node = buffer;
+          buffer = "";
+          if(c == '/') {
+            level--;
+            insideTag = false;
+            doing = XML_PARSE_STATE_PARSING_CLOSE;
+          } else {
+            doing = c == '>' ? XML_PARSE_STATE_DOING_NOTHING : XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE;
+          }
+          continue;
+        }
+        buffer += c;
+        break;
+
+
+      case XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE:
+        // Look until we hit either the end of a tag, or the attribute itself
+        if(Xml::isWhitespace(c) || c == '>' || c == '/' || c == '=') {
+          if(c == '>' || c == '/') {
+            doing = XML_PARSE_STATE_DOING_NOTHING;
+            if(c == '/') {
+              level--;
+              insideTag = false;
+              doing = XML_PARSE_STATE_PARSING_CLOSE;
+            }
+          } else if(c == '=') {
+            doing = XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE_VALUE;
+          } else {
+            doing = XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE;
+          }
+
+          if(buffer.size() > 0) {
+            attrKey = buffer;
+            xml->attributes[buffer] = "";
+            buffer = "";
+          }
+          continue;
+        }
+        buffer += c;
+        break;
+      
+      case XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE_VALUE:
+        // Keep looking until we find a quote mark
+        if(Xml::isWhitespace(c)) continue;
+        if(c == '>' || c == '/') {
+          doing = XML_PARSE_STATE_DOING_NOTHING;
+          insideTag = false;
+          continue;
+        }
+
+        if(c != '"') continue;
+        doing = XML_PARSE_STATE_PARSING_ATTRIBUTE_VALUE;
+        break;
+
+      case XML_PARSE_STATE_PARSING_ATTRIBUTE_VALUE:
+        // Parse the attribute value until we find a quote mark.
+        if(c == '"') {
+          doing = XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE;
+          xml->attributes[attrKey] = buffer;
+          buffer = "";
+          continue;
+        }
+        buffer += c;
+        break;
+
+      case XML_PARSE_STATE_PARSING_VALUE:
+        // Keep parsing child until we find a < for an opening/closing tag.
+        if(c == '<') {
+          // In HTML Spec there could be a child here but not in XML spec.
+          doing = XML_PARSE_STATE_PARSING_CLOSE;
+          xml->value = buffer;
+          buffer = "";
+          continue;
+        }
+        buffer += c;
+        break;
+        
+      case XML_PARSE_STATE_PARSING_CHILD:
+        if(c == '<') {
+          // Read ahead and confirm this is a close or not
+          if(data[i] == '/') {
+            doing = XML_PARSE_STATE_PARSING_CLOSE;
+            continue;
+          }
+
+          if(data[i] == '!' && data[i+1] == '-' && data[i+2] == '-') {
+            doingBeforeComment = doing;
+            doing = XML_PARSE_STATE_PARSING_COMMENT;
+            i += 3;
+            continue;
+          }
+
+          // Likely another child.
+          auto child = new Xml();
+          i -= 1;
+          Xml::load(child, data, &i);
+          xml->children.push_back(child);
+        }
+
+        if(Xml::isWhitespace(c)) continue;
+
+        // In HTML Spec there's a chance for there to be a value here, but not
+        // in the XML spec.
+        break;
+      
+      case XML_PARSE_STATE_PARSING_CLOSE:
+        // Just keep parsing until the tag closer finishes.
+        if(c != '>') continue;
+        doing = XML_PARSE_STATE_DOING_NOTHING;
+        
+        //TODO: Return index or something?
+        *j = i;
+        return;
+
+      case XML_PARSE_STATE_PARSING_COMMENT:
+        if(c != '-') continue;
+        if(data[i] != '-') continue;
+        if(data[i+1] != '>') continue;
+        i += 2;
+        doing = doingBeforeComment;
+        break;
+
+      default:
+        break;
+    }
+  }
+
+  *j = i;
+}
+
+
+std::vector<Xml*> Xml::getChildrenOfType(std::string type) {
+  std::vector<Xml*> children;
+  auto itChildren = this->children.begin();
+  while(itChildren != this->children.end()) {
+    auto child = *itChildren;
+    if(child->node == type) children.push_back(child);
+    ++itChildren;
+  }
+  return children;
+}
+
+Xml * Xml::getFirstChildOfType(std::string type) {
+  auto itChildren = this->children.begin();
+  while(itChildren != this->children.end()) {
+    auto child = *itChildren;
+    if(child->node == type) return child;
+    ++itChildren;
+  }
+  return nullptr;
+}
+
+
+Xml::~Xml() {
+  auto it = this->children.begin();
+  while(it != this->children.end()) {
+    delete *it;
+    ++it;
+  }
+}
\ No newline at end of file
diff --git a/src/dawntools/util/XmlNew.hpp b/src/dawntools/util/XmlNew.hpp
new file mode 100644
index 00000000..3068314c
--- /dev/null
+++ b/src/dawntools/util/XmlNew.hpp
@@ -0,0 +1,43 @@
+// Copyright (c) 2023 Dominic Masters
+// 
+// This software is released under the MIT License.
+// https://opensource.org/licenses/MIT
+
+#pragma once
+#include "dawnsharedlibs.hpp"
+
+namespace Dawn {
+  enum XmlParseState {
+    XML_PARSE_STATE_DOING_NOTHING,
+    XML_PARSE_STATE_PARSING_TAG_NAME,
+    XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE,
+    XML_PARSE_STATE_PARSING_ATTRIBUTE_NAME,
+    XML_PARSE_STATE_LOOKING_FOR_ATTRIBUTE_VALUE,
+    XML_PARSE_STATE_PARSING_ATTRIBUTE_VALUE,
+    XML_PARSE_STATE_PARSING_VALUE,
+    XML_PARSE_STATE_PARSING_CHILD,
+    XML_PARSE_STATE_PARSING_CLOSE,
+    XML_PARSE_STATE_PARSING_COMMENT
+  };
+
+  class Xml {
+    protected:
+      static bool_t isWhitespace(char_t c);
+
+    public:
+      static Xml load(std::string data);
+      static void load(Xml *xml, std::string data, size_t *j);
+
+
+
+      std::string node;
+      std::string value;
+      std::map<std::string, std::string> attributes;
+      std::vector<Xml*> children;
+      
+      std::vector<Xml*> getChildrenOfType(std::string type);
+      Xml * getFirstChildOfType(std::string type);
+
+      ~Xml();
+  };
+}
\ No newline at end of file