CatSystem2 - VN Stats

Invoke on something like dump/*.cst. Needs regex postprocessing, as is common.
   1 import os, sys, zlib
   2 from struct import unpack
   3 
   4 for fname in sys.argv[1:]:
   5     with open(fname, "rb") as f:
   6         fdata = f.read()
   7         
   8         header = fdata[:8].decode("utf-8")
   9         if header != "CatScene":
  10             continue
  11         
  12         compressed_len = unpack("<I", bytes(fdata[0x08:0x0C]))[0]
  13         decompressed_len = unpack("<I", bytes(fdata[0x0C:0x10]))[0]
  14         
  15         decompressed = zlib.decompress(bytes(fdata[0x10:compressed_len+0x10]))
  16         fdata = decompressed
  17         
  18         flen          = unpack("<I", bytes(fdata[0x00:0x04]))[0]
  19         ranges        = unpack("<I", bytes(fdata[0x04:0x08]))[0]
  20         ranges_bytes  = unpack("<I", bytes(fdata[0x08:0x0C]))[0]
  21         headerlen     = unpack("<I", bytes(fdata[0x0C:0x10]))[0]
  22         
  23         # header structure is something like
  24         # uint32 filelen
  25         # uint32 num_ranges
  26         # uint32 len_ranges_bytes
  27         # uint32 len_header_bytes (excluding these first four words)
  28         
  29         # struct[num_ranges] {uint32, uint32} // where first is length of range, second is index of range
  30         # uint32[entries] // indexed into by ranges
  31         # with this part (the two arrays together) being len_header_bytes bytes long
  32         
  33         # we don't actually need any of that for just script ripping
  34         
  35         code = fdata[headerlen+0x10:]
  36         
  37         i = 0
  38         while len(code[i:]) > 0:
  39             # guess
  40             oplen = code[i]
  41             if oplen != 1:
  42                 print("oplen error")
  43                 exit()
  44             i += 1
  45             
  46             op = code[i]
  47             i += 1
  48             if op in [0x20, 0x21, 0x30]:
  49                 string = []
  50                 char = code[i]
  51                 while char != 0:
  52                     string += [char]
  53                     i += 1
  54                     char = code[i]
  55                 i += 1
  56                 string = bytes(string).decode("cp932")
  57                 
  58                 if op == 0x20:
  59                     print(string.replace("\n", "").replace("\\n",""), end="")
  60             elif op in [0x02]: # linefeed
  61                 print("")
  62                 i += 1
  63             else:
  64                 print(f"unknown op {op:02X} at {i+headerlen+0x10:08X} in {fname}")
  65                 exit()
  66         print("")