lint: Check utf16 strings (#449)

This commit is contained in:
Narr the Reg 2025-03-18 17:15:50 -06:00 committed by GitHub
parent 497ff11434
commit 7b1df2234f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 108 additions and 22 deletions

View file

@ -21760,3 +21760,51 @@
0x710187fb58,"NoName"
0x710187fb60,"17AmiiboNpcDirector"
0x710187fb74,"N2al9ISceneObjE"
0x71018acfbc,u"NULL"
0x71018acfc8,u"%02d"
0x71018acfd2,u"?????"
0x71018acfde,u"%d"
0x71018acfe4,u"1"
0x71018acfe8,u"0"
0x71018acfec,u"%d位のひと"
0x71018acffa,u"%ls"
0x71018ad002,u"%u"
0x71018ad008,u"%03d"
0x71018ad012,u"\n"
0x71018ad016,u"%s"
0x71018ad01c,u"-----"
0x71018ad028,u"%d/%d"
0x71018ad034,u"??????"
0x71018ad042,u"----"
0x71018ad04c,u"@"
0x71018ad050,u"テストステージ"
0x71018ad060,u"---"
0x71018ad068,u"/////////////////"
0x71018ad08c,u"未設定"
0x71018ad094,u"66666"
0x71018ad0a0,u"56666"
0x71018ad0ac,u"55666"
0x71018ad0b8,u"55566"
0x71018ad0c4,u"55556"
0x71018ad0d0,u"55555"
0x71018ad0dc,u"%lld"
0x71018ad0e6,u"%.1f"
0x71018ad0f0,u"★30"
0x71018ad0f8,u"* +1"
0x71018ad102,u"None"
0x71018ad10c,u"<シャイン名>"
0x71018ad11c,u"@title"
0x71018ad12a,u"@space"
0x71018ad138,u"5"
0x71018ad13c,u"6"
0x71018ad140,u"シャイン名未設定"
0x71018ad152,u"メッセージ初期化失敗"
0x71018ad168,u"%01d"
0x71018ad172,u"%04d"
0x71018ad17c,u"%05d"
0x71018ad186,u"%06d"
0x71018ad190,u"/"
0x71018ad194,u"%2d"
0x71018ad19c,u"%3d"
0x71018ad1a4,u"%4d"
0x71018ad1ac,u"%5d"

Can't render this file because it is too large.

View file

@ -291,9 +291,12 @@ def common_string_finder(c, path):
if "//" in line:
continue
matches = re.findall(r'"(.*?)"', line)
matches = re.findall(r'(u?".*?")', line)
for match in matches:
if not match.startswith("u"):
# Remove quotes from utf8 strings
match = match[1:-1]
if len(match) < 2:
continue
found = False

View file

@ -10,21 +10,22 @@ MAX_TEXT_SIZE = 0x600
CHUNK_SIZE = 0x200 # Read file chunk size
NSO_OFFSET = 0x70FFFFFF00
START_OFFSET = 0x710181c3d8 # String table start main 1.0.0
END_OFFSET = 0x710187fb74 # String table end
U8_START_OFFSET = 0x710181c3d8 # UTF8 String table start main 1.0.0
U8_END_OFFSET = 0x710187fb74 # UTF8 String table end
U16_START_OFFSET = 0x71018acfbc # UTF16 string table start main 1.0.0
U16_END_OFFSET = 0x71018ad1b3 # UTF16 string table end
def parse_string(buffer, offset, csv_file):
def parse_string(buffer, offset, csv_file, encoding):
nso_addr = NSO_OFFSET + offset
if len (buffer) < MIN_TEXT_SIZE:
return
if len(buffer) > MAX_TEXT_SIZE:
print("Warning: String is bigger than buffer size")
print(buffer)
print(hex(nso_addr), "Warning: String is bigger than buffer size")
return
nso_addr = NSO_OFFSET + offset
try :
text = buffer.decode('utf-8')
text = buffer.decode(encoding)
# Make escape sequences visible
text = text.replace("\\", "\\\\")
@ -35,28 +36,30 @@ def parse_string(buffer, offset, csv_file):
# Write to csv file
csv_file.write(hex(nso_addr))
csv_file.write(",\"")
if encoding == 'utf-16':
csv_file.write(",u\"")
else:
csv_file.write(",\"")
csv_file.write(text)
csv_file.write("\"\n")
except:
#Decode to utf-8 can fail on non-string data
print("Unable to parse string")
print(buffer)
# Decode can fail on non-string data
print(hex(nso_addr), "Unable to parse string")
def parse_nso(csv_file, nso_file):
offset = START_OFFSET - NSO_OFFSET
end = END_OFFSET - NSO_OFFSET
def parse_utf8(csv_file, nso_file):
offset = U8_START_OFFSET - NSO_OFFSET
end = U8_END_OFFSET - NSO_OFFSET
nso_file.seek(offset)
buffer = bytes()
while nso_file.tell() < end:
chunk = nso_file.read(CHUNK_SIZE)
if not chunk:
return
buffer += chunk
buffer += chunk
previous_zero = 0
last_zero = -1
for i, b in enumerate(buffer):
if b != 0:
continue
@ -65,14 +68,46 @@ def parse_nso(csv_file, nso_file):
previous_zero = last_zero
last_zero = i
text = buffer[previous_zero + 1 : last_zero]
parse_string(text, offset, csv_file)
parse_string(text, offset, csv_file, 'utf-8')
offset += last_zero - previous_zero
buffer = buffer[last_zero + 1 : len(buffer)]
def parse_utf16(csv_file, nso_file):
offset = U16_START_OFFSET - NSO_OFFSET
end = U16_END_OFFSET - NSO_OFFSET
nso_file.seek(offset)
buffer = bytes()
while nso_file.tell() < end:
chunk = nso_file.read(CHUNK_SIZE)
if not chunk:
return
buffer += chunk
previous_zero = 0
last_zero = -1
prev = 0
for i, b in enumerate(buffer):
if i % 2 == 0:
prev = b
continue
if prev != 0 or b != 0:
continue
if offset > end:
return
previous_zero = last_zero
last_zero = i
text = buffer[previous_zero + 1 : last_zero - 1]
parse_string(text, offset, csv_file, 'utf-16')
offset += last_zero - previous_zero
buffer = buffer[last_zero + 1 : len(buffer)]
def create_string_table(string_path, nso_path):
with open(string_path, "w") as csv_file, open(nso_path, "rb") as nso_file:
parse_nso(csv_file, nso_file)
parse_utf8(csv_file, nso_file)
parse_utf16(csv_file, nso_file)
project_root = setup.ROOT