chore: correct dlc loading 'regex' + add extra helper tools to the tools folder

msscmp_extract is made by me idk where i got the other two from
This commit is contained in:
Fireblade 2026-05-20 15:52:52 -04:00
parent 9dd13ac289
commit 314a453996
4 changed files with 752 additions and 2 deletions

View file

@ -10,6 +10,42 @@
#include "Common/UI/UI.h"
#include "lce_filesystem/FolderFile.h"
static bool isDigitW(wchar_t ch)
{
return ch >= L'0' && ch <= L'9';
}
static bool isDataPackPckName(const wstring &baseNameLower)
{
const wstring suffix = L"data.pck";
if(baseNameLower.size() <= suffix.size() + 1)
{
return false;
}
if(baseNameLower[0] != L'x')
{
return false;
}
if(baseNameLower.compare(baseNameLower.size() - suffix.size(), suffix.size(), suffix) != 0)
{
return false;
}
const size_t digitsStart = 1;
const size_t digitsEnd = baseNameLower.size() - suffix.size();
if(digitsEnd <= digitsStart)
{
return false;
}
for(size_t i = digitsStart; i < digitsEnd; ++i)
{
if(!isDigitW(baseNameLower[i]))
{
return false;
}
}
return true;
}
static bool hasPckFolderFallback(const wstring &path, wstring &folderPath)
{
wstring lowerPath = toLower(path);
@ -23,8 +59,10 @@ static bool hasPckFolderFallback(const wstring &path, wstring &folderPath)
return false;
}
if(!(lowerPath.find(L"x16Data.pck") != wstring::npos
|| lowerPath.find(L"x32Data.pck") != wstring::npos))
const size_t nameStart = lowerPath.find_last_of(L"/\\");
const size_t baseOffset = (nameStart == wstring::npos) ? 0 : (nameStart + 1);
wstring baseNameLower = lowerPath.substr(baseOffset);
if(!isDataPackPckName(baseNameLower))
{
return false;
}

286
tools/msscmp_extract.py Normal file
View file

@ -0,0 +1,286 @@
#!/usr/bin/env python3
import os
import sys
import struct
import subprocess
from collections import defaultdict
# helper functions
def read_c_string(data, offset):
end = data.find(b'\x00', offset)
if end == -1:
return ""
return data[offset:end].decode(
"utf-8",
errors="ignore"
)
def convert_to_flac(infile, outfile):
# skip if already exists
if os.path.exists(outfile):
return
try:
subprocess.run(
[
"ffmpeg",
"-y",
"-i", infile,
"-c:a", "flac",
outfile
],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=True
)
print(f"[FLAC] {outfile}")
except Exception:
print(f"[FAIL] ffmpeg failed on {infile}")
def main():
if len(sys.argv) < 2:
print("usage: python3 file_extract.py Minecraft.msscmp")
return
infile = sys.argv[1]
with open(infile, "rb") as f:
data = f.read()
# validation
if data[:4] != b'BANK':
print("Not a BANK file")
return
filesize = len(data)
# header recognition
file_table_offset = struct.unpack(
">I",
data[0x18:0x1C]
)[0]
entry_count = struct.unpack(
">I",
data[0x34:0x38]
)[0]
print(f"[+] table @ {hex(file_table_offset)}")
print(f"[+] entries: {entry_count}")
if file_table_offset >= filesize:
print("Bad table offset")
return
# binka and flac output folder(s)
binka_root = "extracted_binka"
flac_root = "extracted_flac"
os.makedirs(binka_root, exist_ok=True)
os.makedirs(flac_root, exist_ok=True)
folder_counts = defaultdict(int)
entries_cache = []
for i in range(entry_count):
entry_off = file_table_offset + (i * 8)
if entry_off + 8 > filesize:
break
try:
folder_off = struct.unpack(
">I",
data[entry_off:entry_off+4]
)[0]
info_off = struct.unpack(
">I",
data[entry_off+4:entry_off+8]
)[0]
if folder_off >= filesize:
continue
if info_off >= filesize:
continue
# yoink audio name from parent dir
folder = read_c_string(
data,
folder_off
)
filename_rel = struct.unpack(
">I",
data[info_off+4:info_off+8]
)[0]
filename_off = info_off + filename_rel
if filename_off >= filesize:
continue
filename = read_c_string(
data,
filename_off
)
data_off = struct.unpack(
"<I",
data[info_off+8:info_off+12]
)[0]
sample_rate = struct.unpack(
">I",
data[info_off+20:info_off+24]
)[0]
size = struct.unpack(
">I",
data[info_off+24:info_off+28]
)[0]
if size <= 0:
continue
if data_off + size > filesize:
continue
clean_folder = folder.replace(
"\\",
"/"
).strip("/")
clean_name = filename.replace(
"*",
""
).strip()
if not clean_name.endswith(".binka"):
clean_name += ".binka"
folder_counts[clean_folder] += 1
entries_cache.append(
(
clean_folder,
clean_name,
data_off,
size,
sample_rate
)
)
except Exception:
continue
# extract + convert to flac
extracted = 0
for (
clean_folder,
clean_name,
data_off,
size,
sample_rate
) in entries_cache:
try:
binka_folder = os.path.join(
binka_root,
clean_folder
)
os.makedirs(
binka_folder,
exist_ok=True
)
binka_path = os.path.join(
binka_folder,
clean_name
)
with open(binka_path, "wb") as out:
out.write(
data[data_off:data_off+size]
)
# folders with one sound get deleted
if folder_counts[clean_folder] == 1:
folder_parts = clean_folder.split("/")
parent_folder = os.path.join(
flac_root,
*folder_parts[:-1]
)
os.makedirs(
parent_folder,
exist_ok=True
)
flac_filename = (
folder_parts[-1] + ".flac"
)
flac_path = os.path.join(
parent_folder,
flac_filename
)
else:
flac_folder = os.path.join(
flac_root,
clean_folder
)
os.makedirs(
flac_folder,
exist_ok=True
)
flac_filename = (
os.path.splitext(clean_name)[0]
+ ".flac"
)
flac_path = os.path.join(
flac_folder,
flac_filename
)
convert_to_flac(
binka_path,
flac_path
)
print(
f"[+] {clean_name} "
f"({size} bytes @ {sample_rate}hz)"
)
extracted += 1
except Exception:
continue
print(f"\nDone. Extracted {extracted} files.")
if __name__ == "__main__":
main()

232
tools/pck_extract.py Normal file
View file

@ -0,0 +1,232 @@
#!/usr/bin/env python3
import struct
import argparse
import zipfile
import os
import sys
# default endian mode
ENDIAN = ">"
def detect_endianness(f):
"""
Detect whether the PCK uses big-endian or little-endian.
"""
global ENDIAN
pos = f.tell()
raw = f.read(4)
if len(raw) != 4:
raise EOFError("File too small")
be = struct.unpack(">I", raw)[0]
le = struct.unpack("<I", raw)[0]
# detect endianness based off of version numbers [usually 3 or 4]
if 0 < be < 100:
ENDIAN = ">"
elif 0 < le < 100:
ENDIAN = "<"
else:
raise ValueError("Could not determine endianness")
f.seek(pos)
print(f"Detected {'Big' if ENDIAN == '>' else 'Little'} Endian")
def read_u32(f):
data = f.read(4)
if len(data) != 4:
raise EOFError("Unexpected EOF while reading uint32")
return struct.unpack(f"{ENDIAN}I", data)[0]
def read_utf16_string(f):
"""
PCK strings are:
uint32 length
UTF-16 bytes
uint32 padding
"""
length = read_u32(f)
if length > 100000:
raise ValueError(f"Unreasonable string length: {length}")
raw = f.read(length * 2)
if len(raw) != length * 2:
raise EOFError("Unexpected EOF while reading string")
encoding = "utf-16-be" if ENDIAN == ">" else "utf-16-le"
text = raw.decode(encoding, errors="replace")
# skip padding
padding = f.read(4)
if len(padding) != 4:
raise EOFError("Unexpected EOF while reading string padding")
return text
def extract_pck_to_zip(input_file, output_zip):
with open(input_file, "rb") as f:
# detect endianness before reading anything
detect_endianness(f)
# ----- HEADER -----
pck_type = read_u32(f)
param_count = read_u32(f)
print(f"PCK Type: {pck_type}")
print(f"Parameter Count: {param_count}")
# ----- PARAMETER LOOKUP TABLE -----
lookup = [None] * param_count
for _ in range(param_count):
idx = read_u32(f)
key = read_utf16_string(f)
if idx >= param_count:
raise ValueError(f"Invalid parameter index: {idx}")
lookup[idx] = key
# Optional XMLVERSION field
if "XMLVERSION" in lookup:
xml_version = read_u32(f)
print(f"XML Version: {xml_version}")
# ----- ASSET TABLE -----
asset_count = read_u32(f)
print(f"Asset Count: {asset_count}")
assets = []
for i in range(asset_count):
size = read_u32(f)
asset_type = read_u32(f)
name = read_utf16_string(f)
name = name.replace("\\", "/")
print(f"[{i+1}/{asset_count}] {name} ({size} bytes)")
assets.append({
"name": name,
"size": size,
"type": asset_type,
})
# ----- ASSET DATA -----
for asset in assets:
asset_param_count = read_u32(f)
params = {}
for _ in range(asset_param_count):
key_index = read_u32(f)
value = read_utf16_string(f)
if key_index < len(lookup):
key = lookup[key_index]
params[key] = value
asset["params"] = params
data = f.read(asset["size"])
if len(data) != asset["size"]:
raise EOFError(
f"Unexpected EOF while reading asset data: {asset['name']}"
)
asset["data"] = data
# ----- WRITE ZIP -----
print(f"\nWriting ZIP: {output_zip}")
with zipfile.ZipFile(
output_zip,
"w",
compression=zipfile.ZIP_DEFLATED
) as zf:
for asset in assets:
zip_name = asset["name"].lstrip("/")
if not zip_name:
continue
print(f"Adding: {zip_name}")
zf.writestr(zip_name, asset["data"])
print("\nDone!")
def main():
parser = argparse.ArgumentParser(
description="Convert Minecraft Legacy Console .pck files to .zip"
)
parser.add_argument(
"input",
help="Input .pck file"
)
parser.add_argument(
"-o",
"--output",
help="Output zip filename"
)
args = parser.parse_args()
input_path = args.input
if not os.path.isfile(input_path):
print(f"Input file not found: {input_path}")
sys.exit(1)
output_path = args.output
if not output_path:
output_path = os.path.splitext(input_path)[0] + ".zip"
try:
extract_pck_to_zip(input_path, output_path)
except Exception as e:
print(f"\nERROR: {e}")
sys.exit(1)
if __name__ == "__main__":
main()

194
tools/pck_pack.py Normal file
View file

@ -0,0 +1,194 @@
#!/usr/bin/env python3
import struct
import argparse
import os
import sys
# default endian
ENDIAN = ">"
# defaults
DEFAULT_PCK_TYPE = 3
DEFAULT_XML_VERSION = 4
DEFAULT_ASSET_TYPE = 0
def write_u32(f, value):
f.write(struct.pack(f"{ENDIAN}I", value))
def write_utf16_string(f, text):
"""
PCK strings:
uint32 length
UTF-16 bytes
uint32 padding
"""
encoding = "utf-16-be" if ENDIAN == ">" else "utf-16-le"
encoded = text.encode(encoding)
write_u32(f, len(text))
f.write(encoded)
# padding
write_u32(f, 0)
def collect_folder_assets(folder_path):
assets = []
for root, _, files in os.walk(folder_path):
for file in files:
full_path = os.path.join(root, file)
rel_path = os.path.relpath(full_path, folder_path)
# PCKs usually use backslashes
rel_path = rel_path.replace("/", "\\")
rel_path = rel_path.replace("\\\\", "\\")
with open(full_path, "rb") as f:
data = f.read()
assets.append({
"name": rel_path,
"size": len(data),
"type": DEFAULT_ASSET_TYPE,
"params": {},
"data": data,
})
return assets
def pack_folder_to_pck(input_folder, output_pck):
assets = collect_folder_assets(input_folder)
if not assets:
raise ValueError("No files found in folder")
# parameter lookup table
lookup = [
"PATH",
"TYPE",
"XMLVERSION",
]
print(f"Assets: {len(assets)}")
with open(output_pck, "wb") as f:
# ----- HEADER -----
write_u32(f, DEFAULT_PCK_TYPE)
write_u32(f, len(lookup))
# ----- PARAMETER LOOKUP TABLE -----
for idx, key in enumerate(lookup):
write_u32(f, idx)
write_utf16_string(f, key)
# optional XMLVERSION
if "XMLVERSION" in lookup:
write_u32(f, DEFAULT_XML_VERSION)
# ----- ASSET TABLE -----
write_u32(f, len(assets))
for asset in assets:
write_u32(f, asset["size"])
write_u32(f, asset["type"])
write_utf16_string(f, asset["name"])
print(f"Indexing: {asset['name']} ({asset['size']} bytes)")
# ----- ASSET DATA -----
for asset in assets:
params = asset["params"]
write_u32(f, len(params))
for key, value in params.items():
key_index = lookup.index(key)
write_u32(f, key_index)
write_utf16_string(f, value)
f.write(asset["data"])
print(f"Writing: {asset['name']}")
print(f"\nDone! Wrote: {output_pck}")
def main():
global ENDIAN
parser = argparse.ArgumentParser(
description="Pack a folder into a Minecraft Legacy Console .pck"
)
parser.add_argument(
"input",
help="Input folder"
)
parser.add_argument(
"-o",
"--output",
help="Output .pck filename"
)
parser.add_argument(
"--little",
action="store_true",
help="Write little-endian PCK"
)
args = parser.parse_args()
input_path = args.input
if not os.path.isdir(input_path):
print(f"Input folder not found: {input_path}")
sys.exit(1)
if args.little:
ENDIAN = "<"
output_path = args.output
if not output_path:
output_path = os.path.basename(
os.path.normpath(input_path)
) + ".pck"
try:
pack_folder_to_pck(input_path, output_path)
except Exception as e:
print(f"\nERROR: {e}")
sys.exit(1)
if __name__ == "__main__":
main()