Refactor

2026-01-25 21:23:09 -06:00
parent d788de8637
commit 07afc3813a
35 changed files with 51 additions and 61 deletions
--- a/tools/asset/process/language.py
+++ b/tools/asset/process/language.py
@@ -0,0 +1,193 @@
+import sys
+import os
+from tools.asset.args import args
+from tools.asset.cache import assetCache, assetGetCache
+from tools.asset.path import getAssetRelativePath
+from tools.dusk.defs import defs
+import polib
+import re
+
+LANGUAGE_CHUNK_CHAR_COUNT = int(defs.get('ASSET_LANG_CHUNK_CHAR_COUNT'))
+
+LANGUAGE_DATA = {}
+LANGUAGE_KEYS = []
+
+def processLanguageList():
+  # Language keys header data
+  headerKeys = "// Auto-generated language keys header file.\n"
+  headerKeys += "#pragma once\n"
+  headerKeys += "#include \"dusk.h\"\n\n"
+
+  # This is the desired chunk groups list.. if a language key STARTS with any
+  # of the keys in this list we would "like to" put it in that chunk group.
+  # If there is no match, or the list is full then we will add it to the next
+  # available chunk group (that isn't a 'desired' one). If the chunk becomes
+  # full, then we attempt to make another chunk with the same prefix so that
+  # a second batching can occur.
+  desiredChunkGroups = {
+    'ui': 0
+  }
+
+  # Now, for each language key, create the header reference and index.
+  keyIndex = 0
+  languageKeyIndexes = {}
+  languageKeyChunk = {}
+  languageKeyChunkIndexes = {}
+  languageKeyChunkOffsets = {}
+  for key in LANGUAGE_KEYS:
+    headerKeys += f"#define {getLanguageVariableName(key)} {keyIndex}\n"
+    languageKeyIndexes[key] = keyIndex
+    keyIndex += 1
+
+    # Find desired chunk group
+    assignedChunk = None
+    for desiredKey in desiredChunkGroups:
+      if key.lower().startswith(desiredKey):
+        assignedChunk = desiredChunkGroups[desiredKey]
+        break
+    # If no desired chunk group matched, assign to -1
+    if assignedChunk is None:
+      assignedChunk = -1
+    languageKeyChunk[key] = assignedChunk
+
+    # Setup header.
+    for lang in LANGUAGE_DATA:
+      if key not in LANGUAGE_DATA[lang]:
+        print(f"Warning: Missing translation for key '{key}' in language '{lang}'")
+        sys.exit(1)
+
+  # Seal the header.
+  headerKeys += f"\n#define LANG_KEY_COUNT {len(LANGUAGE_KEYS)}\n"
+
+  # Now we can generate the language string chunks.
+  nextChunkIndex = max(desiredChunkGroups.values()) + 1
+  files = []
+
+  for lang in LANGUAGE_DATA:
+    langData = LANGUAGE_DATA[lang]
+
+    # Key = chunkIndex, value = chunkInfo
+    languageChunks = {}
+    for key in LANGUAGE_KEYS:
+      keyIndex = languageKeyIndexes[key]
+      chunkIndex = languageKeyChunk[key]
+      wasSetChunk = chunkIndex != -1
+
+      # This will keep looping until we find a chunk
+      while True:
+        # Determine the next chunkIndex IF chunkIndex is -1
+        if chunkIndex == -1:
+          chunkIndex = nextChunkIndex
+        
+        # Is the chunk full?
+        curLen = languageChunks.get(chunkIndex, {'len': 0})['len']
+        newLen = curLen + len(langData[key])
+        if newLen > LANGUAGE_CHUNK_CHAR_COUNT:
+          # Chunk is full, need to create a new chunk.
+          chunkIndex = -1
+          if wasSetChunk:
+            wasSetChunk = False
+          else:
+            nextChunkIndex += 1
+          continue
+
+        # Chunk is not full, we can use it.
+        if chunkIndex not in languageChunks:
+          languageChunks[chunkIndex] = {
+            'len': 0,
+            'keys': []
+          }
+        languageChunks[chunkIndex]['len'] = newLen
+        languageChunks[chunkIndex]['keys'].append(key)
+        languageKeyChunkIndexes[key] = chunkIndex
+        languageKeyChunkOffsets[key] = curLen
+        break
+
+    # We have now chunked all the keys for this language!
+    langBuffer = b""
+
+    # Write header info 
+    langBuffer += b'DLF'  # Dusk Language File
+
+    for key in LANGUAGE_KEYS:
+      # Write the chunk that this key belongs to as uint32_t
+      chunkIndex = languageKeyChunkIndexes[key]
+      langBuffer += chunkIndex.to_bytes(4, byteorder='little')
+
+      # Write the offset for this key as uint32_t
+      offset = languageKeyChunkOffsets[key]
+      langBuffer += offset.to_bytes(4, byteorder='little')
+
+      # Write the length of the string as uint32_t
+      strData = langData[key].encode('utf-8')
+      langBuffer += len(strData).to_bytes(4, byteorder='little')
+    
+    # Now write out each chunk's string data, packed tight and no null term.
+    for chunkIndex in sorted(languageChunks.keys()):
+      chunkInfo = languageChunks[chunkIndex]
+      for key in chunkInfo['keys']:
+        strData = langData[key].encode('utf-8')
+        langBuffer += strData
+
+      # Now pad the chunk to full size
+      curLen = chunkInfo['len']
+      if curLen < LANGUAGE_CHUNK_CHAR_COUNT:
+        padSize = LANGUAGE_CHUNK_CHAR_COUNT - curLen
+        langBuffer += b'\0' * padSize
+
+    # Write out the language data file
+    outputFile = os.path.join(args.output_assets, "language", f"{lang}.dlf")
+    files.append(outputFile)
+    os.makedirs(os.path.dirname(outputFile), exist_ok=True)
+    with open(outputFile, "wb") as f:
+      f.write(langBuffer)
+    
+
+  # Write out the language keys header file
+  outputFile = os.path.join(args.headers_dir, "locale", "language", "keys.h")
+  os.makedirs(os.path.dirname(outputFile), exist_ok=True)
+  with open(outputFile, "w") as f:
+    f.write(headerKeys)
+
+  return {
+    'files': files
+  }
+
+def getLanguageVariableName(languageKey):
+  # Take the language key, prepend LANG_, uppercase, replace any non symbols
+  # with _
+  key = languageKey.strip().upper()
+  key = re.sub(r'[^A-Z0-9]', '_', key)
+  return f"LANG_{key}"
+
+def processLanguage(asset):
+  cache = assetGetCache(asset['path'])
+  if cache is not None:
+    return cache
+  
+  # Load PO File
+  po = polib.pofile(asset['path'])
+
+  langName = po.metadata.get('Language')
+  if langName not in LANGUAGE_DATA:
+    LANGUAGE_DATA[langName] = {}
+
+  for entry in po:
+    key = entry.msgid
+    val = entry.msgstr
+
+    if key not in LANGUAGE_KEYS:
+      LANGUAGE_KEYS.append(key)
+    
+    if key not in LANGUAGE_DATA[langName]:
+      LANGUAGE_DATA[langName][key] = val
+    else:
+      print(f"Error: Duplicate translation key '{key}' in language '{langName}'")
+      sys.exit(1)
+
+  outLanguageData = {
+    'data': po,
+    'path': asset['path'],
+    'files': []
+  }
+  return assetCache(asset['path'], outLanguageData)