Differences between revisions 9 and 10
Revision 9 as of 2017-09-30 06:31:05
Size: 9371
Editor: weh
Comment:
Revision 10 as of 2017-10-01 11:21:51
Size: 12661
Editor: weh
Comment:
Deletions are marked like this. Additions are marked like this.
Line 162: Line 162:
Oretsuba Afterstory is only vaguely similar. It uses .tob files, some kind of braindamaged bytecode, --(and badly terminated inline strings)--.

'''''this is broken, don't use it'''''
Oretsuba Afterstory is only vaguely similar. It uses .tob files, some kind of braindamaged bytecode with inconsistent internal length handling, and badly terminated strings.
Line 196: Line 194:
                 //puts(argv[i]);
        
Line 238: Line 239:
        //uint32_t start = ftell(f);         uint32_t start = ftell(f);
Line 241: Line 242:
            uint32_t location = ftell(f);
            
            // I have absolutely no idea if these are correct.
Line 246: Line 250:
                {
Line 247: Line 252:
                }
Line 251: Line 257:
                c = fgetc(f);
                c = fgetc(f);
                c = fgetc(f);
                c = fgetc(f);
                c = fgetc(f);
                c = fgetc(f);
                
                uint32_t skip;
                int n = fread(&skip, 4, 1, f);
                fseek(f, -4, SEEK_CUR);
                fseek(f, skip, SEEK_CUR);
                
                if(n < 1)
                {
                    if(text != "")
                        printf("%s\n", text.data());
                    break;
                }
                
Line 277: Line 264:
                //start = ftell(f);                 start = ftell(f);
            }
            else if(c == 0x20)
            {
                fseek(f, 4, SEEK_CUR);
                continue;
            }
            else if(c == 0x01)
            {
                uint32_t length;
                fread(&length, 4, 1, f);
                if(length < 4)
                {
                    printf("mayday in op 01 at %08X in %s\n", location, argv[i]);
                    exit(0);
                }
                fseek(f, length-4, SEEK_CUR);
                continue;
            }
            else if(c == 0x03)
            {
                // I have no idea how this works. Don't blame me if it breaks.
                uint16_t unknown1;
                fread(&unknown1, 2, 1, f);
                if(unknown1 > 0xFF)
                {
                    printf("mayday 1 in op 03 at %08X in %s\n", location, argv[i]);
                    exit(0);
                }
                fseek(f, unknown1+1, SEEK_CUR);
                
                uint16_t unknown2;
                fread(&unknown2, 2, 1, f);
                if(unknown2 < 2)
                {
                    printf("mayday 2 in op 03 at %08X in %s\n", location, argv[i]);
                    exit(0);
                }
                unknown2 -= 2;
                fseek(f, unknown2-2, SEEK_CUR);
                
                uint32_t unknown3;
                fread(&unknown3, 4, 1, f);
                if(unknown3 < 4)
                {
                    printf("mayday 3 in op 03 at %08X in %s\n", location, argv[i]);
                    exit(0);
                }
                fseek(f, unknown3-4, SEEK_CUR);
                continue;
            }
            else if(c == 0x02)
            {
                // I have no idea how this works. Don't blame me if it breaks.
                fseek(f, 8, SEEK_CUR);
                
                uint32_t unknown1;
                fread(&unknown1, 4, 1, f);
                if(unknown1 < 4)
                {
                    printf("mayday 2 in op 03 at %08X in %s\n", location, argv[i]);
                    exit(0);
                }
                fseek(f, unknown1-4, SEEK_CUR);
                continue;
            }
            else if(c == 0x73)
            {
                uint32_t length;
                fread(&length, 4, 1, f);
                if(length > 0x0000FFFF)
                {
                    printf("mayday 1 in op 73 at %08X in %s\n", location, argv[i]);
                    exit(0);
                }
                fseek(f, length+1, SEEK_CUR);
                fread(&length, 4, 1, f);
                if(length < 4)
                {
                    printf("mayday 2 in op 73 at %08X in %s\n", location, argv[i]);
                    exit(0);
                }
                fseek(f, length-4, SEEK_CUR);
                continue;
            }
            else if(is_upper_jis_surrogate(c))
            {
                bool intext = true;
                while(intext)
                {
                    text += c;
                    if(is_upper_jis_surrogate(c))
                        text += fgetc(f);
                    c = fgetc(f);
                    if(c == 0x5B)
                        intext = false;
                }
            }
            else if(c == 0)
            {
                if(fgetc(f) >= 0)
                {
                    printf("unknown operation %02X at %08X in %s\n", c, location, argv[i]);
                    exit(0);
                }
                else
                    break;
Line 281: Line 374:
                text += c;
                if(is_upper_jis_surrogate(c))
                    text += fgetc(f);
                printf("unknown operation %02X at %08X in %s\n", c, location, argv[i]);
                //exit(0);
                break;

.sob files, 32-bit word bytecode packed with a lookup table in the header and a string table at the end. I have NO IDEA what the VN engine actually does to execute its logic, only how to get the text out of the scripts, and I'm not 100% certain it's correct, it just seems to work properly.

Oretsuba Prelude and Oretsuba seem to trigger dialogue text with different commands (0x000001C8 vs 0x000001CF). I'm not certain that this is correct, but it seems to work.

Useful regexes:

   1 ^#[^&]*&
   2 <remove>
   3 
   4 \$$
   5 <remove>
   6 
   7 <[-0-9]*>(\\)?
   8 <remove>

Dumper:

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <stdint.h>
   4 #include <string.h>
   5 
   6 #include <set>
   7 #include <map>
   8 #include <vector>
   9 
  10 std::vector<std::map<uint32_t, uint32_t>> maps;
  11 
  12 std::map<uint32_t, std::string> examples;
  13 
  14 //deduplicate identical strings within each scene even if they have different addresses
  15 bool deduplciate = true;
  16 std::set<uint32_t> seen;
  17 
  18 int main(int argc, char ** argv)
  19 {
  20     int strings = 0;
  21     int totalspace = 0;
  22     for(int i = 0; i < argc; i++)
  23     {
  24         maps.clear();
  25         seen.clear();
  26         
  27         auto f = fopen(argv[i], "rb");
  28         if(!f) continue;
  29         char magic[4];
  30         int n = fread(magic, 1, 4, f);
  31         if(n != 4 or strncmp(magic, "SOB0", 4) != 0) continue;
  32         uint32_t table_size;
  33         fread(&table_size, 4, 1, f);
  34         uint32_t table_end = table_size + 0x8;
  35         int header_count = 0;
  36         //int j = 0;
  37         while(ftell(f) < table_end)
  38         {
  39             //printf("Header %d at %08X:\n", j++, ftell(f));
  40             uint32_t word_pairs;
  41             fread(&word_pairs, 4, 1, f);
  42             std::map<uint32_t, uint32_t> newmap;
  43             for(int i = 0; i < word_pairs; i++)
  44             {
  45                 uint32_t key;
  46                 uint32_t value;
  47                 fread(&key, 4, 1, f);
  48                 fread(&value, 4, 1, f);
  49                 newmap[key] = value;
  50                 //printf("%08X: %08X\n", key, value);
  51             }
  52             maps.push_back(newmap);
  53             header_count += 1;
  54         }
  55         if(maps.size() < 5)
  56         {
  57             puts("not enough maps");
  58             exit(0);
  59         }
  60         fseek(f, table_end, SEEK_SET);
  61         uint32_t strings_start;
  62         uint32_t strings_end;
  63         fread(&strings_start, 4, 1, f);
  64         fread(&strings_end, 4, 1, f);
  65         totalspace += strings_end-strings_start;
  66         uint32_t header_end = ftell(f);
  67         uint32_t code_end = strings_start+header_end;
  68         //printf("String range: %08X~%08X\n", strings_start, strings_end);
  69         
  70         // can't tell if text is content text until a command tries to use it? I think?
  71         std::string text_memory;
  72         uint32_t text_memory_addr;
  73         
  74         bool justdidtext = false;
  75         
  76         // no idea if commands have proper inline arguments in this bytecode lol
  77         while(ftell(f) < code_end)
  78         {
  79             uint32_t address = ftell(f)-header_end;
  80             uint32_t command;
  81             fread(&command, 4, 1, f);
  82             
  83             if((command & 0xFFF00000) == 0x01800000 and maps[4].count(address) and maps[4][address] >= strings_start and maps[4][address] < strings_end)
  84             {
  85                 //printf("Found command that uses string: ");
  86                 //printf("%08X: %08X (%08X) / ", address, command, maps[4][address], argv[i]);
  87                 //printf("|%08X|:(|%08X|) [%s]\n", address+header_end, maps[4][address]+header_end, argv[i]);
  88                 strings++;
  89                 fseek(f, maps[4][address]+header_end, SEEK_SET);
  90                 int c = fgetc(f);
  91                 
  92                 if(c == 0) // 00 XX XX <string
  93                     fseek(f, 2, SEEK_CUR);
  94                 else // <string>
  95                     fseek(f, -1, SEEK_CUR);
  96                 
  97                 auto start = ftell(f);
  98                 while((c = fgetc(f)) != 0);
  99                 auto end = ftell(f);
 100                 char * text = (char *)malloc(end-start);
 101                 fseek(f, start, SEEK_SET);
 102                 fread(text, 1, end-start, f);
 103                 std::string str = std::string(text);
 104                 if(!examples.count(command))
 105                     examples[command] = str;
 106                 
 107                 //if(str != "")
 108                 //    printf("%08X text %s\n", command, text);
 109                     //printf("%s\n", text);
 110                     //printf("%08X: %08X %s\n", address+header_end, command, text);
 111                 
 112                 text_memory = str;
 113                 text_memory_addr = maps[4][address];
 114                 
 115                 free(text);
 116                 justdidtext = true;
 117             }
 118             else
 119             {
 120                 // I have NO CERTAINTY that this is correct
 121                 if((command == 0x000001C8 or command == 0x000001CF) and text_memory != "" and (!deduplciate or !seen.count(text_memory_addr)))
 122                 {
 123                     puts(text_memory.data());
 124                     if(deduplciate) seen.insert(text_memory_addr);
 125                 }
 126                 
 127                 justdidtext = false;
 128             }
 129             fseek(f, address+header_end+4, SEEK_SET);
 130         }
 131         
 132         fclose(f);
 133     }
 134     //printf("Total space for strings: %d\n", totalspace);
 135     //printf("Total strings: %d\n", strings);
 136     //for(auto &[k, v] : examples)
 137     //{
 138     //    printf("%08X: %s\n", k, v.data());
 139     //}
 140 }

Oretsuba Afterstory is only vaguely similar. It uses .tob files, some kind of braindamaged bytecode with inconsistent internal length handling, and badly terminated strings.

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <stdint.h>
   4 #include <string.h>
   5 
   6 #include <set>
   7 #include <map>
   8 
   9 std::map<uint32_t, std::string> map; // resume table?
  10 std::set<uint32_t> set; // resume table?
  11 
  12 bool is_upper_jis_surrogate(int c)
  13 {
  14     return (c >= 0x80 and c <= 0xA1) or (c >= 0xE0 and c <= 0xFF);
  15 }
  16 
  17 int main(int argc, char ** argv)
  18 {
  19     //int strings = 0;
  20     for(int i = 0; i < argc; i++)
  21     {
  22         map.clear();
  23         set.clear();
  24         
  25         auto f = fopen(argv[i], "rb");
  26         if(!f) continue;
  27         char magic[4];
  28         int n = fread(magic, 1, 4, f);
  29         if(n != 4 or strncmp(magic, "TOB0", 4) != 0) continue;
  30         
  31         //puts(argv[i]);
  32         
  33         uint32_t header_size;
  34         uint32_t header_elements;
  35         fread(&header_size, 4, 1, f);
  36         fread(&header_elements, 4, 1, f);
  37         
  38         for(int j = 0; j < header_elements; j++)
  39         {
  40             uint8_t size;
  41             fread(&size, 1, 1, f);
  42             
  43             char * text = (char *)malloc(size-4);
  44             fread(text, 1, size-4, f);
  45             
  46             uint32_t value;
  47             fread(&value, 4, 1, f);
  48             map[value] = text;
  49             
  50             free(text);
  51         }
  52         if(ftell(f) != header_size+4)
  53         {
  54             puts("desynchronized"), puts(argv[i]);
  55             exit(0);
  56         }
  57         
  58         
  59         uint32_t table_size;
  60         uint32_t table_elements;
  61         fread(&table_size, 4, 1, f);
  62         fread(&table_elements, 4, 1, f);
  63         uint32_t table_end = table_size + header_size + 4;
  64         while(ftell(f) < table_end)
  65         {
  66             //printf("Header at %08X\n", ftell(f));
  67             uint32_t word;
  68             fread(&word, 4, 1, f);
  69             set.insert(word);
  70         }
  71         
  72         fseek(f, table_end, SEEK_SET);
  73         
  74         std::string text = "";
  75         uint32_t start = ftell(f);
  76         while(1)
  77         {
  78             uint32_t location = ftell(f);
  79             
  80             // I have absolutely no idea if these are correct.
  81             auto c = fgetc(f);
  82             bool closefile = feof(f) or ferror(f) or c < 0;
  83             if(closefile)
  84             {
  85                 if(text != "")
  86                 {
  87                     printf("%s\n", text.data());
  88                 }
  89                 break;
  90             }
  91             if(c == 0x5B) // STOP MAKING INHERENTLY ASCII INCOMPATIBLE FORMATS REEEEEEEEEEEEEEEEEEEEEEE
  92             {
  93                 if(text != "")
  94                 {
  95                     //printf("%08X \"%s\"\n", start, text.data());
  96                     printf("%s\n", text.data());
  97                 }
  98                 text = "";
  99                 
 100                 start = ftell(f);
 101             }
 102             else if(c == 0x20)
 103             {
 104                 fseek(f, 4, SEEK_CUR);
 105                 continue;
 106             }
 107             else if(c == 0x01)
 108             {
 109                 uint32_t length;
 110                 fread(&length, 4, 1, f);
 111                 if(length < 4)
 112                 {
 113                     printf("mayday in op 01 at %08X in %s\n", location, argv[i]);
 114                     exit(0);
 115                 }
 116                 fseek(f, length-4, SEEK_CUR);
 117                 continue;
 118             }
 119             else if(c == 0x03)
 120             {
 121                 // I have no idea how this works. Don't blame me if it breaks.
 122                 uint16_t unknown1;
 123                 fread(&unknown1, 2, 1, f);
 124                 if(unknown1 > 0xFF)
 125                 {
 126                     printf("mayday 1 in op 03 at %08X in %s\n", location, argv[i]);
 127                     exit(0);
 128                 }
 129                 fseek(f, unknown1+1, SEEK_CUR);
 130                 
 131                 uint16_t unknown2;
 132                 fread(&unknown2, 2, 1, f);
 133                 if(unknown2 < 2)
 134                 {
 135                     printf("mayday 2 in op 03 at %08X in %s\n", location, argv[i]);
 136                     exit(0);
 137                 }
 138                 unknown2 -= 2;
 139                 fseek(f, unknown2-2, SEEK_CUR);
 140                 
 141                 uint32_t unknown3;
 142                 fread(&unknown3, 4, 1, f);
 143                 if(unknown3 < 4)
 144                 {
 145                     printf("mayday 3 in op 03 at %08X in %s\n", location, argv[i]);
 146                     exit(0);
 147                 }
 148                 fseek(f, unknown3-4, SEEK_CUR);
 149                 continue;
 150             }
 151             else if(c == 0x02)
 152             {
 153                 // I have no idea how this works. Don't blame me if it breaks.
 154                 fseek(f, 8, SEEK_CUR);
 155                 
 156                 uint32_t unknown1;
 157                 fread(&unknown1, 4, 1, f);
 158                 if(unknown1 < 4)
 159                 {
 160                     printf("mayday 2 in op 03 at %08X in %s\n", location, argv[i]);
 161                     exit(0);
 162                 }
 163                 fseek(f, unknown1-4, SEEK_CUR);
 164                 continue;
 165             }
 166             else if(c == 0x73)
 167             {
 168                 uint32_t length;
 169                 fread(&length, 4, 1, f);
 170                 if(length > 0x0000FFFF)
 171                 {
 172                     printf("mayday 1 in op 73 at %08X in %s\n", location, argv[i]);
 173                     exit(0);
 174                 }
 175                 fseek(f, length+1, SEEK_CUR);
 176                 fread(&length, 4, 1, f);
 177                 if(length < 4)
 178                 {
 179                     printf("mayday 2 in op 73 at %08X in %s\n", location, argv[i]);
 180                     exit(0);
 181                 }
 182                 fseek(f, length-4, SEEK_CUR);
 183                 continue;
 184             }
 185             else if(is_upper_jis_surrogate(c))
 186             {
 187                 bool intext = true;
 188                 while(intext)
 189                 {
 190                     text += c;
 191                     if(is_upper_jis_surrogate(c))
 192                         text += fgetc(f);
 193                     c = fgetc(f);
 194                     if(c == 0x5B)
 195                         intext = false;
 196                 }
 197             }
 198             else if(c == 0)
 199             {
 200                 if(fgetc(f) >= 0)
 201                 {
 202                     printf("unknown operation %02X at %08X in %s\n", c, location, argv[i]);
 203                     exit(0);
 204                 }
 205                 else
 206                     break;
 207             }
 208             else
 209             {
 210                 printf("unknown operation %02X at %08X in %s\n", c, location, argv[i]);
 211                 //exit(0);
 212                 break;
 213             }
 214         }
 215         
 216         fclose(f);
 217     }
 218     //printf("Total strings: %d\n", strings);
 219     //for(auto &[k, v] : examples)
 220     //{
 221     //    printf("%08X: %s\n", k, v.data());
 222     //}
 223 }

Lucifen Library (last edited 2017-10-01 11:21:51 by weh)