.sob files, 32-bit word bytecode packed with a lookup table in the header and a string table at the end. I have NO IDEA what the VN engine actually does to execute its logic, only how to get the text out of the scripts, and I'm not 100% certain it's correct, it just seems to work properly. Oretsuba Prelude and Oretsuba seem to trigger dialogue text with different commands (0x000001C8 vs 0x000001CF). I'm not certain that this is correct, but it seems to work. Useful regexes: {{{#!highlight ^#[^&]*& \$$ <[-0-9]*>(\\)? }}} Dumper: {{{#!highlight c++ #include #include #include #include #include #include #include std::vector> maps; std::map examples; //deduplicate identical strings within each scene even if they have different addresses bool deduplciate = true; std::set seen; int main(int argc, char ** argv) { int strings = 0; int totalspace = 0; for(int i = 0; i < argc; i++) { maps.clear(); seen.clear(); auto f = fopen(argv[i], "rb"); if(!f) continue; char magic[4]; int n = fread(magic, 1, 4, f); if(n != 4 or strncmp(magic, "SOB0", 4) != 0) continue; uint32_t table_size; fread(&table_size, 4, 1, f); uint32_t table_end = table_size + 0x8; int header_count = 0; //int j = 0; while(ftell(f) < table_end) { //printf("Header %d at %08X:\n", j++, ftell(f)); uint32_t word_pairs; fread(&word_pairs, 4, 1, f); std::map newmap; for(int i = 0; i < word_pairs; i++) { uint32_t key; uint32_t value; fread(&key, 4, 1, f); fread(&value, 4, 1, f); newmap[key] = value; //printf("%08X: %08X\n", key, value); } maps.push_back(newmap); header_count += 1; } if(maps.size() < 5) { puts("not enough maps"); exit(0); } fseek(f, table_end, SEEK_SET); uint32_t strings_start; uint32_t strings_end; fread(&strings_start, 4, 1, f); fread(&strings_end, 4, 1, f); totalspace += strings_end-strings_start; uint32_t header_end = ftell(f); uint32_t code_end = strings_start+header_end; //printf("String range: %08X~%08X\n", strings_start, strings_end); // can't tell if text is content text until a command tries to use it? I think? std::string text_memory; uint32_t text_memory_addr; bool justdidtext = false; // no idea if commands have proper inline arguments in this bytecode lol while(ftell(f) < code_end) { uint32_t address = ftell(f)-header_end; uint32_t command; fread(&command, 4, 1, f); if((command & 0xFFF00000) == 0x01800000 and maps[4].count(address) and maps[4][address] >= strings_start and maps[4][address] < strings_end) { //printf("Found command that uses string: "); //printf("%08X: %08X (%08X) / ", address, command, maps[4][address], argv[i]); //printf("|%08X|:(|%08X|) [%s]\n", address+header_end, maps[4][address]+header_end, argv[i]); strings++; fseek(f, maps[4][address]+header_end, SEEK_SET); int c = fgetc(f); if(c == 0) // 00 XX XX fseek(f, -1, SEEK_CUR); auto start = ftell(f); while((c = fgetc(f)) != 0); auto end = ftell(f); char * text = (char *)malloc(end-start); fseek(f, start, SEEK_SET); fread(text, 1, end-start, f); std::string str = std::string(text); if(!examples.count(command)) examples[command] = str; //if(str != "") // printf("%08X text %s\n", command, text); //printf("%s\n", text); //printf("%08X: %08X %s\n", address+header_end, command, text); text_memory = str; text_memory_addr = maps[4][address]; free(text); justdidtext = true; } else { // I have NO CERTAINTY that this is correct if((command == 0x000001C8 or command == 0x000001CF) and text_memory != "" and (!deduplciate or !seen.count(text_memory_addr))) { puts(text_memory.data()); if(deduplciate) seen.insert(text_memory_addr); } justdidtext = false; } fseek(f, address+header_end+4, SEEK_SET); } fclose(f); } //printf("Total space for strings: %d\n", totalspace); //printf("Total strings: %d\n", strings); //for(auto &[k, v] : examples) //{ // printf("%08X: %s\n", k, v.data()); //} } }}} Oretsuba Afterstory is only vaguely similar. It uses .tob files, some kind of braindamaged bytecode with inconsistent internal length handling, and badly terminated strings. {{{#!highlight c++ #include #include #include #include #include #include std::map map; // resume table? std::set set; // resume table? bool is_upper_jis_surrogate(int c) { return (c >= 0x80 and c <= 0xA1) or (c >= 0xE0 and c <= 0xFF); } int main(int argc, char ** argv) { //int strings = 0; for(int i = 0; i < argc; i++) { map.clear(); set.clear(); auto f = fopen(argv[i], "rb"); if(!f) continue; char magic[4]; int n = fread(magic, 1, 4, f); if(n != 4 or strncmp(magic, "TOB0", 4) != 0) continue; //puts(argv[i]); uint32_t header_size; uint32_t header_elements; fread(&header_size, 4, 1, f); fread(&header_elements, 4, 1, f); for(int j = 0; j < header_elements; j++) { uint8_t size; fread(&size, 1, 1, f); char * text = (char *)malloc(size-4); fread(text, 1, size-4, f); uint32_t value; fread(&value, 4, 1, f); map[value] = text; free(text); } if(ftell(f) != header_size+4) { puts("desynchronized"), puts(argv[i]); exit(0); } uint32_t table_size; uint32_t table_elements; fread(&table_size, 4, 1, f); fread(&table_elements, 4, 1, f); uint32_t table_end = table_size + header_size + 4; while(ftell(f) < table_end) { //printf("Header at %08X\n", ftell(f)); uint32_t word; fread(&word, 4, 1, f); set.insert(word); } fseek(f, table_end, SEEK_SET); std::string text = ""; uint32_t start = ftell(f); while(1) { uint32_t location = ftell(f); // I have absolutely no idea if these are correct. auto c = fgetc(f); bool closefile = feof(f) or ferror(f) or c < 0; if(closefile) { if(text != "") { printf("%s\n", text.data()); } break; } if(c == 0x5B) // STOP MAKING INHERENTLY ASCII INCOMPATIBLE FORMATS REEEEEEEEEEEEEEEEEEEEEEE { if(text != "") { //printf("%08X \"%s\"\n", start, text.data()); printf("%s\n", text.data()); } text = ""; start = ftell(f); } else if(c == 0x20) { fseek(f, 4, SEEK_CUR); continue; } else if(c == 0x01) { uint32_t length; fread(&length, 4, 1, f); if(length < 4) { printf("mayday in op 01 at %08X in %s\n", location, argv[i]); exit(0); } fseek(f, length-4, SEEK_CUR); continue; } else if(c == 0x03) { // I have no idea how this works. Don't blame me if it breaks. uint16_t unknown1; fread(&unknown1, 2, 1, f); if(unknown1 > 0xFF) { printf("mayday 1 in op 03 at %08X in %s\n", location, argv[i]); exit(0); } fseek(f, unknown1+1, SEEK_CUR); uint16_t unknown2; fread(&unknown2, 2, 1, f); if(unknown2 < 2) { printf("mayday 2 in op 03 at %08X in %s\n", location, argv[i]); exit(0); } unknown2 -= 2; fseek(f, unknown2-2, SEEK_CUR); uint32_t unknown3; fread(&unknown3, 4, 1, f); if(unknown3 < 4) { printf("mayday 3 in op 03 at %08X in %s\n", location, argv[i]); exit(0); } fseek(f, unknown3-4, SEEK_CUR); continue; } else if(c == 0x02) { // I have no idea how this works. Don't blame me if it breaks. fseek(f, 8, SEEK_CUR); uint32_t unknown1; fread(&unknown1, 4, 1, f); if(unknown1 < 4) { printf("mayday 2 in op 03 at %08X in %s\n", location, argv[i]); exit(0); } fseek(f, unknown1-4, SEEK_CUR); continue; } else if(c == 0x73) { uint32_t length; fread(&length, 4, 1, f); if(length > 0x0000FFFF) { printf("mayday 1 in op 73 at %08X in %s\n", location, argv[i]); exit(0); } fseek(f, length+1, SEEK_CUR); fread(&length, 4, 1, f); if(length < 4) { printf("mayday 2 in op 73 at %08X in %s\n", location, argv[i]); exit(0); } fseek(f, length-4, SEEK_CUR); continue; } else if(is_upper_jis_surrogate(c)) { bool intext = true; while(intext) { text += c; if(is_upper_jis_surrogate(c)) text += fgetc(f); c = fgetc(f); if(c == 0x5B) intext = false; } } else if(c == 0) { if(fgetc(f) >= 0) { printf("unknown operation %02X at %08X in %s\n", c, location, argv[i]); exit(0); } else break; } else { printf("unknown operation %02X at %08X in %s\n", c, location, argv[i]); //exit(0); break; } } fclose(f); } //printf("Total strings: %d\n", strings); //for(auto &[k, v] : examples) //{ // printf("%08X: %s\n", k, v.data()); //} } }}}