Language chunking script done (untested)

2025-11-08 08:17:11 -06:00
parent b7d898b505
commit 9f88374627
3 changed files with 82 additions and 8 deletions
--- a/assets/locale/en_US.po
+++ b/assets/locale/en_US.po
@@ -6,5 +6,11 @@ msgstr ""
 "Content-Transfer-Encoding: 8bit\n"
 "Plural-Forms: nplurals=2; plural=(n != 1);\n"
-msgid "test.test"
+msgid "ui.test"
 msgstr "Hello this is a test."
 msgid "map.test"
 msgstr "This is a map test."
 msgid "test.test2"
 msgstr "This is another test."
--- a/src/locale/language/languagechunk.h
+++ b/src/locale/language/languagechunk.h
@@ -8,11 +8,8 @@
 #pragma once
 #include "dusk.h"
-#define LANG_CHUNK_STRING_COUNT 64
+#define LANG_CHUNK_CHAR_COUNT 6 * 1024 // 6 KB per chunk
 #define LANG_CHUNK_STRING_MAX_LENGTH DUSK_LANG_STRING_MAX_LENGTH
 #pragma pack(push, 1)
-typedef struct {
+typedef char languagechunkdata_t[LANG_CHUNK_CHAR_COUNT];
  void *n;
 } languagechunk_t;
 #pragma pack(pop)
--- a/tools/assetstool/processlanguage.py
+++ b/tools/assetstool/processlanguage.py
@@ -6,6 +6,8 @@ from assethelpers import getAssetRelativePath
 import polib
 import re
 LANGUAGE_CHUNK_CHAR_COUNT = 6 * 1024 # 6 KB per chunk
 LANGUAGE_DATA = {}
 LANGUAGE_KEYS = []
@@ -18,7 +20,9 @@ def processLanguageList():
  # This is the desired chunk groups list.. if a language key STARTS with any
  # of the keys in this list we would "like to" put it in that chunk group.
  # If there is no match, or the list is full then we will add it to the next
-  # available chunk group (that isn't a 'desired' one).
+  # available chunk group (that isn't a 'desired' one). If the chunk becomes
  # full, then we attempt to make another chunk with the same prefix so that
  # a second batching can occur.
  desiredChunkGroups = {
    'ui': 0
  }
@@ -27,6 +31,8 @@ def processLanguageList():
  keyIndex = 0
  languageKeyIndexes = {}
  languageKeyChunk = {}
  languageKeyChunkIndexes = {}
  languageKeyChunkOffsets = {}
  for key in LANGUAGE_KEYS:
    headerKeys += f"#define {getLanguageVariableName(key)} {keyIndex}\n"
    languageKeyIndexes[key] = keyIndex
@@ -53,6 +59,7 @@ def processLanguageList():
  headerKeys += f"\n#define LANG_KEY_COUNT {len(LANGUAGE_KEYS)}\n"
  # Now we can generate the language string chunks.
  nextChunkIndex = max(desiredChunkGroups.values()) + 1
  for lang in LANGUAGE_DATA:
    langData = LANGUAGE_DATA[lang]
@@ -61,6 +68,70 @@ def processLanguageList():
    for key in LANGUAGE_KEYS:
      keyIndex = languageKeyIndexes[key]
      chunkIndex = languageKeyChunk[key]
      wasSetChunk = chunkIndex != -1
      # This will keep looping until we find a chunk
      while True:
        # Determine the next chunkIndex IF chunkIndex is -1
        if chunkIndex == -1:
          chunkIndex = nextChunkIndex
        # Is the chunk full?
        curLen = languageChunks.get(chunkIndex, {'len': 0})['len']
        newLen = curLen + len(langData[key])
        if newLen > LANGUAGE_CHUNK_CHAR_COUNT:
          # Chunk is full, need to create a new chunk.
          chunkIndex = -1
          if wasSetChunk:
            wasSetChunk = False
          else:
            nextChunkIndex += 1
          continue
        # Chunk is not full, we can use it.
        if chunkIndex not in languageChunks:
          languageChunks[chunkIndex] = {
            'len': 0,
            'keys': []
          }
        languageChunks[chunkIndex]['len'] = newLen
        languageChunks[chunkIndex]['keys'].append(key)
        languageKeyChunkIndexes[key] = chunkIndex
        languageKeyChunkOffsets[key] = curLen
        break
    # We have now chunked all the keys for this language!
    langBuffer = b""
    # Write header info 
    langBuffer += b'DLF'  # Dusk Language File
    for key in LANGUAGE_KEYS:
      # Write the chunk that this key belongs to as uint32_t
      chunkIndex = languageKeyChunkIndexes[key]
      langBuffer += chunkIndex.to_bytes(4, byteorder='little')
      # Write the offset for this key as uint32_t
      offset = languageKeyChunkOffsets[key]
      langBuffer += offset.to_bytes(4, byteorder='little')
      # Write the length of the string as uint32_t
      strData = langData[key].encode('utf-8')
      langBuffer += len(strData).to_bytes(4, byteorder='little')
    # Now write out each chunk's string data, packed tight and no null term.
    for chunkIndex in sorted(languageChunks.keys()):
      chunkInfo = languageChunks[chunkIndex]
      for key in chunkInfo['keys']:
        strData = langData[key].encode('utf-8')
        langBuffer += strData
    # Write out the language data file
    outputFile = os.path.join(args.output_assets, "language", f"{lang}.dlf")
    os.makedirs(os.path.dirname(outputFile), exist_ok=True)
    with open(outputFile, "wb") as f:
      f.write(langBuffer)
  # Write out the language keys header file
  outputFile = os.path.join(args.headers_dir, "locale", "language", "keys.h")