.sob files, 32-bit word bytecode packed with a lookup table in the header and a string table at the end. I have NO IDEA what the VN engine actually does to execute its logic, only how to get the text out of the scripts, and I'm not 100% certain it's correct, it just seems to work properly.
Oretsuba Prelude and Oretsuba seem to trigger dialogue text with different commands (0x000001C8 vs 0x000001CF). I'm not certain that this is correct, but it seems to work.
Useful regexes:
Dumper:
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <stdint.h>
4 #include <string.h>
5
6 #include <set>
7 #include <map>
8 #include <vector>
9
10 std::vector<std::map<uint32_t, uint32_t>> maps;
11
12 std::map<uint32_t, std::string> examples;
13
14 //deduplicate identical strings within each scene even if they have different addresses
15 bool deduplciate = true;
16 std::set<uint32_t> seen;
17
18 int main(int argc, char ** argv)
19 {
20 int strings = 0;
21 int totalspace = 0;
22 for(int i = 0; i < argc; i++)
23 {
24 maps.clear();
25 seen.clear();
26
27 auto f = fopen(argv[i], "rb");
28 if(!f) continue;
29 char magic[4];
30 int n = fread(magic, 1, 4, f);
31 if(n != 4 or strncmp(magic, "SOB0", 4) != 0) continue;
32 uint32_t table_size;
33 fread(&table_size, 4, 1, f);
34 uint32_t table_end = table_size + 0x8;
35 int header_count = 0;
36 //int j = 0;
37 while(ftell(f) < table_end)
38 {
39 //printf("Header %d at %08X:\n", j++, ftell(f));
40 uint32_t word_pairs;
41 fread(&word_pairs, 4, 1, f);
42 std::map<uint32_t, uint32_t> newmap;
43 for(int i = 0; i < word_pairs; i++)
44 {
45 uint32_t key;
46 uint32_t value;
47 fread(&key, 4, 1, f);
48 fread(&value, 4, 1, f);
49 newmap[key] = value;
50 //printf("%08X: %08X\n", key, value);
51 }
52 maps.push_back(newmap);
53 header_count += 1;
54 }
55 if(maps.size() < 5)
56 {
57 puts("not enough maps");
58 exit(0);
59 }
60 fseek(f, table_end, SEEK_SET);
61 uint32_t strings_start;
62 uint32_t strings_end;
63 fread(&strings_start, 4, 1, f);
64 fread(&strings_end, 4, 1, f);
65 totalspace += strings_end-strings_start;
66 uint32_t header_end = ftell(f);
67 uint32_t code_end = strings_start+header_end;
68 //printf("String range: %08X~%08X\n", strings_start, strings_end);
69
70 // can't tell if text is content text until a command tries to use it? I think?
71 std::string text_memory;
72 uint32_t text_memory_addr;
73
74 bool justdidtext = false;
75
76 // no idea if commands have proper inline arguments in this bytecode lol
77 while(ftell(f) < code_end)
78 {
79 uint32_t address = ftell(f)-header_end;
80 uint32_t command;
81 fread(&command, 4, 1, f);
82
83 if((command & 0xFFF00000) == 0x01800000 and maps[4].count(address) and maps[4][address] >= strings_start and maps[4][address] < strings_end)
84 {
85 //printf("Found command that uses string: ");
86 //printf("%08X: %08X (%08X) / ", address, command, maps[4][address], argv[i]);
87 //printf("|%08X|:(|%08X|) [%s]\n", address+header_end, maps[4][address]+header_end, argv[i]);
88 strings++;
89 fseek(f, maps[4][address]+header_end, SEEK_SET);
90 int c = fgetc(f);
91
92 if(c == 0) // 00 XX XX <string
93 fseek(f, 2, SEEK_CUR);
94 else // <string>
95 fseek(f, -1, SEEK_CUR);
96
97 auto start = ftell(f);
98 while((c = fgetc(f)) != 0);
99 auto end = ftell(f);
100 char * text = (char *)malloc(end-start);
101 fseek(f, start, SEEK_SET);
102 fread(text, 1, end-start, f);
103 std::string str = std::string(text);
104 if(!examples.count(command))
105 examples[command] = str;
106
107 //if(str != "")
108 // printf("%08X text %s\n", command, text);
109 //printf("%s\n", text);
110 //printf("%08X: %08X %s\n", address+header_end, command, text);
111
112 text_memory = str;
113 text_memory_addr = maps[4][address];
114
115 free(text);
116 justdidtext = true;
117 }
118 else
119 {
120 // I have NO CERTAINTY that this is correct
121 if((command == 0x000001C8 or command == 0x000001CF) and text_memory != "" and (!deduplciate or !seen.count(text_memory_addr)))
122 {
123 puts(text_memory.data());
124 if(deduplciate) seen.insert(text_memory_addr);
125 }
126
127 justdidtext = false;
128 }
129 fseek(f, address+header_end+4, SEEK_SET);
130 }
131
132 fclose(f);
133 }
134 //printf("Total space for strings: %d\n", totalspace);
135 //printf("Total strings: %d\n", strings);
136 //for(auto &[k, v] : examples)
137 //{
138 // printf("%08X: %s\n", k, v.data());
139 //}
140 }
Oretsuba Afterstory is only vaguely similar. It uses .tob files, some kind of braindamaged bytecode with inconsistent internal length handling, and badly terminated strings.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <stdint.h>
4 #include <string.h>
5
6 #include <set>
7 #include <map>
8
9 std::map<uint32_t, std::string> map; // resume table?
10 std::set<uint32_t> set; // resume table?
11
12 bool is_upper_jis_surrogate(int c)
13 {
14 return (c >= 0x80 and c <= 0xA1) or (c >= 0xE0 and c <= 0xFF);
15 }
16
17 int main(int argc, char ** argv)
18 {
19 //int strings = 0;
20 for(int i = 0; i < argc; i++)
21 {
22 map.clear();
23 set.clear();
24
25 auto f = fopen(argv[i], "rb");
26 if(!f) continue;
27 char magic[4];
28 int n = fread(magic, 1, 4, f);
29 if(n != 4 or strncmp(magic, "TOB0", 4) != 0) continue;
30
31 //puts(argv[i]);
32
33 uint32_t header_size;
34 uint32_t header_elements;
35 fread(&header_size, 4, 1, f);
36 fread(&header_elements, 4, 1, f);
37
38 for(int j = 0; j < header_elements; j++)
39 {
40 uint8_t size;
41 fread(&size, 1, 1, f);
42
43 char * text = (char *)malloc(size-4);
44 fread(text, 1, size-4, f);
45
46 uint32_t value;
47 fread(&value, 4, 1, f);
48 map[value] = text;
49
50 free(text);
51 }
52 if(ftell(f) != header_size+4)
53 {
54 puts("desynchronized"), puts(argv[i]);
55 exit(0);
56 }
57
58
59 uint32_t table_size;
60 uint32_t table_elements;
61 fread(&table_size, 4, 1, f);
62 fread(&table_elements, 4, 1, f);
63 uint32_t table_end = table_size + header_size + 4;
64 while(ftell(f) < table_end)
65 {
66 //printf("Header at %08X\n", ftell(f));
67 uint32_t word;
68 fread(&word, 4, 1, f);
69 set.insert(word);
70 }
71
72 fseek(f, table_end, SEEK_SET);
73
74 std::string text = "";
75 uint32_t start = ftell(f);
76 while(1)
77 {
78 uint32_t location = ftell(f);
79
80 // I have absolutely no idea if these are correct.
81 auto c = fgetc(f);
82 bool closefile = feof(f) or ferror(f) or c < 0;
83 if(closefile)
84 {
85 if(text != "")
86 {
87 printf("%s\n", text.data());
88 }
89 break;
90 }
91 if(c == 0x5B) // STOP MAKING INHERENTLY ASCII INCOMPATIBLE FORMATS REEEEEEEEEEEEEEEEEEEEEEE
92 {
93 if(text != "")
94 {
95 //printf("%08X \"%s\"\n", start, text.data());
96 printf("%s\n", text.data());
97 }
98 text = "";
99
100 start = ftell(f);
101 }
102 else if(c == 0x20)
103 {
104 fseek(f, 4, SEEK_CUR);
105 continue;
106 }
107 else if(c == 0x01)
108 {
109 uint32_t length;
110 fread(&length, 4, 1, f);
111 if(length < 4)
112 {
113 printf("mayday in op 01 at %08X in %s\n", location, argv[i]);
114 exit(0);
115 }
116 fseek(f, length-4, SEEK_CUR);
117 continue;
118 }
119 else if(c == 0x03)
120 {
121 // I have no idea how this works. Don't blame me if it breaks.
122 uint16_t unknown1;
123 fread(&unknown1, 2, 1, f);
124 if(unknown1 > 0xFF)
125 {
126 printf("mayday 1 in op 03 at %08X in %s\n", location, argv[i]);
127 exit(0);
128 }
129 fseek(f, unknown1+1, SEEK_CUR);
130
131 uint16_t unknown2;
132 fread(&unknown2, 2, 1, f);
133 if(unknown2 < 2)
134 {
135 printf("mayday 2 in op 03 at %08X in %s\n", location, argv[i]);
136 exit(0);
137 }
138 unknown2 -= 2;
139 fseek(f, unknown2-2, SEEK_CUR);
140
141 uint32_t unknown3;
142 fread(&unknown3, 4, 1, f);
143 if(unknown3 < 4)
144 {
145 printf("mayday 3 in op 03 at %08X in %s\n", location, argv[i]);
146 exit(0);
147 }
148 fseek(f, unknown3-4, SEEK_CUR);
149 continue;
150 }
151 else if(c == 0x02)
152 {
153 // I have no idea how this works. Don't blame me if it breaks.
154 fseek(f, 8, SEEK_CUR);
155
156 uint32_t unknown1;
157 fread(&unknown1, 4, 1, f);
158 if(unknown1 < 4)
159 {
160 printf("mayday 2 in op 03 at %08X in %s\n", location, argv[i]);
161 exit(0);
162 }
163 fseek(f, unknown1-4, SEEK_CUR);
164 continue;
165 }
166 else if(c == 0x73)
167 {
168 uint32_t length;
169 fread(&length, 4, 1, f);
170 if(length > 0x0000FFFF)
171 {
172 printf("mayday 1 in op 73 at %08X in %s\n", location, argv[i]);
173 exit(0);
174 }
175 fseek(f, length+1, SEEK_CUR);
176 fread(&length, 4, 1, f);
177 if(length < 4)
178 {
179 printf("mayday 2 in op 73 at %08X in %s\n", location, argv[i]);
180 exit(0);
181 }
182 fseek(f, length-4, SEEK_CUR);
183 continue;
184 }
185 else if(is_upper_jis_surrogate(c))
186 {
187 bool intext = true;
188 while(intext)
189 {
190 text += c;
191 if(is_upper_jis_surrogate(c))
192 text += fgetc(f);
193 c = fgetc(f);
194 if(c == 0x5B)
195 intext = false;
196 }
197 }
198 else if(c == 0)
199 {
200 if(fgetc(f) >= 0)
201 {
202 printf("unknown operation %02X at %08X in %s\n", c, location, argv[i]);
203 exit(0);
204 }
205 else
206 break;
207 }
208 else
209 {
210 printf("unknown operation %02X at %08X in %s\n", c, location, argv[i]);
211 //exit(0);
212 break;
213 }
214 }
215
216 fclose(f);
217 }
218 //printf("Total strings: %d\n", strings);
219 //for(auto &[k, v] : examples)
220 //{
221 // printf("%08X: %s\n", k, v.data());
222 //}
223 }