Skip to content

Instantly share code, notes, and snippets.

@Yanis002
Last active September 13, 2024 16:46
Show Gist options
  • Select an option

  • Save Yanis002/22b56716e4d6e37c83e035d0bd321a46 to your computer and use it in GitHub Desktop.

Select an option

Save Yanis002/22b56716e4d6e37c83e035d0bd321a46 to your computer and use it in GitHub Desktop.
ST10 string table parser/editor for the wii virtual console emulator
#!/usr/bin/env python3
import hashlib
from pathlib import Path
from dataclasses import dataclass
# Methods of StringTable the user can use:
# - append() to add an element
# - insert() to insert an element
# - remove() to remove an element
# - edit() to edit the string of an element
# - clear() to reset the table
# - to_bytes() to get the bytes to write
# - from_file() to get a table from an existing file
# the size of the header
HEADER_SIZE = 0x20
# the maximum length of a line vc can draw
LINE_LENGTH = 60
# the table id for the error message (always the same value)
TID_ERRORS = 0x2842C987
# the total length of the encoding name
ENCODING_NAME_LENGTH = 18
# the size of an entry, shouldn't change
ENTRY_SIZE = 0x10
class String(str):
"""Custom str class to handle bytes conversion"""
def to_bytes(self, encoding: str = "utf-8", add_extras: bool = False, auto_newlines: bool = False):
"""convert strings to bytes (aligned to 0x4)"""
# add a newline every N characters, N being the value of LINE_LENGTH
if auto_newlines:
self = "\n".join(self[i:i + LINE_LENGTH] for i in range(0, len(self), LINE_LENGTH))
out_bytes = bytearray(self, encoding)
if add_extras:
# add the end-of-string char
if 0x00 not in out_bytes:
out_bytes.append(0x00)
# add alignment bytes if necessary
while len(bytes(out_bytes)) % 4:
out_bytes.append(0xBB)
return bytes(out_bytes)
@dataclass
class STEntry:
"""Defines an entry of the string table"""
nStringID: int # unique identifier, new values are a md5 hash of the string
nTextOffset1: int # offset to the string
nTextOffset2: int # same as above
nTextSize1: int # size of the string (not counting the '\0' char)
nTextSize2: int # same as above
def to_bytes(self):
output = bytearray()
output.extend(self.nStringID.to_bytes(4, byteorder="big"))
output.extend(self.nTextOffset1.to_bytes(4, byteorder="big"))
output.extend(self.nTextOffset2.to_bytes(4, byteorder="big"))
output.extend(self.nTextSize1.to_bytes(2, byteorder="big"))
output.extend(self.nTextSize2.to_bytes(2, byteorder="big"))
return bytes(output)
class STHeader:
"""Defines the header of the string table"""
def __init__(
self,
magic: String = String("ST10"), # the "version" of the format
eTableID: int = TID_ERRORS, # unique identifier of the error table (should stay the same)
nEntries: int = int(), # the number of entries of the table
szEncoding: String = String("utf-8" + "\x00" * (ENCODING_NAME_LENGTH - 5)), # the name of the encoding used
code: String = String("en"), # the language, the original tool called it "code" according to left-over config files
nSizeEntry: int = ENTRY_SIZE, # the size of an entry
unk1F: int = 0xC0, # unknown, seems to stay at 0xC0, always
):
self.magic = magic
self.eTableID = eTableID
self.nEntries = nEntries
self.szEncoding = szEncoding
self.code = code
self.nSizeEntry = nSizeEntry
self.unk1F = unk1F
self.entries: list[STEntry] = []
def validate(self):
if len(self.entries) == 0:
raise ValueError("ERROR: No entries found.")
def to_bytes(self, encoding: str = "utf-8"):
output = bytearray()
output.extend(self.magic.to_bytes(encoding))
output.extend(self.eTableID.to_bytes(4, byteorder="big"))
output.extend(self.nEntries.to_bytes(2, byteorder="big"))
output.extend(self.szEncoding.to_bytes(encoding))
output.extend(self.code.to_bytes(encoding))
output.extend(self.nSizeEntry.to_bytes(1, byteorder="big"))
output.extend(self.unk1F.to_bytes(1, byteorder="big"))
for entry in self.entries:
output.extend(entry.to_bytes())
return bytes(output)
class StringTable:
"""Defines the string table, following the ST10 format"""
def __init__(self, strings: list[str | String] = list()):
self.header: STHeader = STHeader()
self.szStrings: list[String] = []
for string in strings:
if isinstance(string, str):
string = String(string)
self.append(string)
def get_entries_offset(self):
return HEADER_SIZE
def get_strings_offset(self):
return self.header.nEntries * self.header.nSizeEntry + HEADER_SIZE
def get_encoding(self):
return "shift-jis" if self.header.code == "jp" else "utf-8"
def get_new_id(self, string: str):
id = int(hashlib.md5(string.encode(self.get_encoding())).hexdigest(), 16) % 10**8
for entry in self.header.entries:
if entry.nStringID == id:
print(f"WARNING: this ID already exists! ('0x{id:08X}')")
return id
def get_offset(self, prev_offset: int, index: int):
# the new offset is the offset of the previous entry + the size of the previous string
# assuming it's not the first entry, else it's simply the offset of the start of the strings
if index > 0:
return prev_offset + len(self.szStrings[index - 1].to_bytes(self.get_encoding(), True))
else:
return self.get_strings_offset()
def update(self):
self.header.nEntries = len(self.header.entries)
for i, (string, entry) in enumerate(zip(self.szStrings, self.header.entries)):
prev_offset = self.header.entries[i - 1].nTextOffset1 if i > 0 else 0
entry.nTextOffset1 = entry.nTextOffset2 = self.get_offset(prev_offset, i)
entry.nTextSize1 = entry.nTextSize2 = len(string)
def append(self, string: str | String):
if isinstance(string, str):
string = String(string)
index = len(self.header.entries) - 1
prev_offset = self.header.entries[index - 1].nTextOffset1 if index > 0 else 0
self.header.entries.append(
STEntry(
self.get_new_id(string),
self.get_offset(prev_offset, index),
self.get_offset(prev_offset, index),
len(string),
len(string),
)
)
self.szStrings.append(string)
def insert(self, index: int, string: str | String):
if isinstance(string, str):
string = String(string)
prev_offset = self.header.entries[index - 1].nTextOffset1 if index > 0 else 0
self.header.entries.insert(
index,
STEntry(
self.get_new_id(string),
self.get_offset(prev_offset, index),
self.get_offset(prev_offset, index),
len(string),
len(string),
)
)
self.szStrings.insert(index, string)
self.update()
def remove(self, index: int):
self.header.entries.pop(index)
self.szStrings.pop(index)
self.update()
def edit(self, index: int, new_string: str | String):
if isinstance(new_string, str):
new_string = String(new_string)
self.szStrings[index] = new_string
self.update()
def clear(self):
self.header.nEntries = 0
self.header.entries.clear()
self.szStrings.clear()
def validate(self):
self.header.validate()
if len(self.szStrings) == 0:
raise ValueError("ERROR: No strings found.")
def to_bytes(self, auto_newlines: bool = False):
output = bytearray()
self.validate()
self.update()
output.extend(self.header.to_bytes(self.get_encoding()))
for string in self.szStrings:
output.extend(string.to_bytes(self.get_encoding(), True, auto_newlines))
return bytes(output)
@staticmethod
def from_file(path: Path):
with path.open("rb") as file:
data = file.read()
new_table = StringTable()
new_table.header = STHeader(
String(data[0x00:0x04].decode()),
int.from_bytes(data[0x04:0x08], byteorder="big"),
int.from_bytes(data[0x08:0x0A], byteorder="big"),
String(data[0x0A:0x1C].decode()),
String(data[0x1C:0x1E].decode()),
int.from_bytes(data[0x1E:0x1F], byteorder="big"),
int.from_bytes(data[0x1F:0x20], byteorder="big"),
)
if new_table.header.magic != "ST10":
raise ValueError("ERROR: This file is not compatible.")
for i in range(new_table.header.nEntries):
offset = i * new_table.header.nSizeEntry + new_table.get_entries_offset()
new_table.header.entries.append(
STEntry(
int.from_bytes(data[offset + 0x00:offset + 0x04], byteorder="big"),
int.from_bytes(data[offset + 0x04:offset + 0x08], byteorder="big"),
int.from_bytes(data[offset + 0x08:offset + 0x0C], byteorder="big"),
int.from_bytes(data[offset + 0x0C:offset + 0x0E], byteorder="big"),
int.from_bytes(data[offset + 0x0E:offset + 0x10], byteorder="big"),
)
)
assert len(new_table.header.entries) == new_table.header.nEntries
j = 0
str_bytes = bytes()
for i, byte in enumerate(data[new_table.get_strings_offset():]):
if j < len(new_table.header.entries) and new_table.header.entries[j].nTextSize1 == 0:
new_table.szStrings.append(String(""))
j += 1
else:
if byte == 0x00 and len(str_bytes) == 0:
continue
if byte == 0xBB and len(str_bytes) == 0:
continue
if byte == 0x00 or data[new_table.get_strings_offset() + i + 1] == 0xBB:
if len(str_bytes) > 0:
new_table.szStrings.append(String(str_bytes.decode(new_table.get_encoding())))
j += 1
str_bytes = bytes()
continue
str_bytes += byte.to_bytes(byteorder="big")
assert len(new_table.szStrings) == new_table.header.nEntries
return new_table
if __name__ == "__main__":
# new table example
new_table = StringTable(["abc", "123", "def", "456"])
with Path("new_table.bin").resolve().open("wb") as file:
file.write(new_table.to_bytes())
# existing table example
Errors_VC64ErrorStrings_en = StringTable.from_file(Path("Errors_VC64ErrorStrings_en.bin").resolve())
Errors_VC64ErrorStrings_en.edit(1, "Hello World!")
with Path("NEW_Errors_VC64ErrorStrings_en.bin").resolve().open("wb") as file:
file.write(Errors_VC64ErrorStrings_en.to_bytes(True))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment