Differences between revisions 4 and 8 (spanning 4 versions)
Revision 4 as of 2017-09-30 01:27:48
Size: 4953
Editor: weh
Comment:
Revision 8 as of 2017-09-30 05:41:07
Size: 9323
Editor: weh
Comment:
Deletions are marked like this. Additions are marked like this.
Line 4: Line 4:

Useful regexes:
{{{#!highlight
^#[^&]*&
<remove>

\$$
<remove>

<[-0-9]*>(\\)?
<remove>
}}}

Dumper:
Line 11: Line 25:
#include <set>
Line 17: Line 32:

//deduplicate identical strings within each scene even if they have different addresses
bool deduplciate = true;
std::set<uint32_t> seen;
Line 21: Line 40:
    int totalspace = 0;
Line 24: Line 44:
        seen.clear();
Line 34: Line 55:
        //int j = 0;
Line 36: Line 58:
            //printf("Header at %08X\n", ftell(f));             //printf("Header %d at %08X:\n", j++, ftell(f));
Line 47: Line 69:
                //printf("%08X: %08X\n", key, value);
Line 61: Line 84:
        totalspace += strings_end-strings_start;
Line 62: Line 86:
        uint32_t code_end = strings_start+header_end;
Line 66: Line 91:
        uint32_t text_memory_addr;
Line 70: Line 96:
        while(ftell(f) < strings_start)         while(ftell(f) < code_end)
Line 75: Line 101:
            if((command & 0xFFF00000) == 0x01800000)
            {
                if(maps[4].count(address) and maps[4][address] >= strings_start and maps[4][address] < strings_end)
                         if((command & 0xFFF00000) == 0x01800000 and maps[4].count(address) and maps[4][address] >= strings_start and maps[4][address] < strings_end)
            {
                //printf("Found command that uses string: ");
                //printf("%08X: %08X (%08X) / ", address, command, maps[4][address], argv[i]);
                //printf("|%08X|:(|%08X|) [%s]\n", address+header_end, maps[4][address]+header_end, argv[i]);
                strings++;
                fseek(f, maps[4][address]+header_end, SEEK_SET);
                int c = fgetc(f);
                
                if(c == 0) // 00 XX XX <string
                    fseek(f, 2, SEEK_CUR);
                else // <string>
                    fseek(f, -1, SEEK_CUR);
                
                auto start = ftell(f);
                while((c = fgetc(f)) != 0);
                auto end = ftell(f);
                char * text = (char *)malloc(end-start);
                fseek(f, start, SEEK_SET);
                fread(text, 1, end-start, f);
                std::string str = std::string(text);
                if(!examples.count(command))
                    examples[command] = str;
                
                //if(str != "")
                // printf("%08X text %s\n", command, text);
                    //printf("%s\n", text);
                    //printf("%08X: %08X %s\n", address+header_end, command, text);
                
                text_memory = str;
                text_memory_addr = maps[4][address];
                
                free(text);
                justdidtext = true;
            }
            else
            {
                // I have NO CERTAINTY that this is correct
                if((command == 0x000001C8 or command == 0x000001CF) and text_memory != "" and (!deduplciate or !seen.count(text_memory_addr)))
Line 79: Line 142:
                    //printf("Found command that uses string: ");
                    //printf("%08X: %08X (%08X) / ", address, command, maps[4][address], argv[i]);
                    //printf("|%08X|:(|%08X|) [%s]\n", address+header_end, maps[4][address]+header_end, argv[i]);
                    strings++;
                    fseek(f, maps[4][address]+header_end, SEEK_SET);
                    int c = fgetc(f);
                    
                    if(c == 0) // 00 XX XX <string
                        fseek(f, 2, SEEK_CUR);
                    else // <string>
                        fseek(f, -1, SEEK_CUR);
                    
                    auto start = ftell(f);
                    while((c = fgetc(f)) != 0);
                    auto end = ftell(f);
                    char * text = (char *)malloc(end-start);
                    fseek(f, start, SEEK_SET);
                    fread(text, 1, end-start, f);
                    std::string str = std::string(text);
                    if(!examples.count(command))
                        examples[command] = str;
                    
                    //if(str != "")
                    // printf("%08X text %s\n", command, text);
                        //printf("%08X: %08X %s\n", address+header_end, command, text);
                    
                    text_memory = text;
                    
                    free(text);
                    justdidtext = true;
                    puts(text_memory.data());
                    if(deduplciate) seen.insert(text_memory_addr);
Line 110: Line 145:
                                 justdidtext = false;
            }
            fseek(f, address+header_end+4, SEEK_SET);
        }
        
        fclose(f);
    }
    //printf("Total space for strings: %d\n", totalspace);
    //printf("Total strings: %d\n", strings);
    //for(auto &[k, v] : examples)
    //{
    // printf("%08X: %s\n", k, v.data());
    //}
}
}}}

Oretsuba Afterstory is only vaguely similar. It uses .tob files, some kind of braindamaged bytecode, and badly terminated inline strings.

{{{#!highlight c++
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>

#include <set>
#include <map>

std::map<uint32_t, std::string> map; // resume table?
std::set<uint32_t> set; // resume table?

bool is_upper_jis_surrogate(int c)
{
    return (c >= 0x80 and c <= 0xA1) or (c >= 0xE0 and c <= 0xFF);
}

int main(int argc, char ** argv)
{
    //int strings = 0;
    for(int i = 0; i < argc; i++)
    {
        map.clear();
        set.clear();
        
        auto f = fopen(argv[i], "rb");
        if(!f) continue;
        char magic[4];
        int n = fread(magic, 1, 4, f);
        if(n != 4 or strncmp(magic, "TOB0", 4) != 0) continue;
        uint32_t header_size;
        uint32_t header_elements;
        fread(&header_size, 4, 1, f);
        fread(&header_elements, 4, 1, f);
        
        for(int j = 0; j < header_elements; j++)
        {
            uint8_t size;
            fread(&size, 1, 1, f);
            
            char * text = (char *)malloc(size-4);
            fread(text, 1, size-4, f);
            
            uint32_t value;
            fread(&value, 4, 1, f);
            map[value] = text;
            
            free(text);
        }
        if(ftell(f) != header_size+4)
        {
            puts("desynchronized"), puts(argv[i]);
            exit(0);
        }
        
        
        uint32_t table_size;
        uint32_t table_elements;
        fread(&table_size, 4, 1, f);
        fread(&table_elements, 4, 1, f);
        uint32_t table_end = table_size + header_size + 4;
        while(ftell(f) < table_end)
        {
            //printf("Header at %08X\n", ftell(f));
            uint32_t word;
            fread(&word, 4, 1, f);
            set.insert(word);
        }
        
        fseek(f, table_end, SEEK_SET);
        
        std::string text = "";
        //uint32_t start = ftell(f);
        while(1)
        {
            auto c = fgetc(f);
            bool closefile = feof(f) or ferror(f) or c < 0;
            if(closefile)
            {
                if(text != "")
                    printf("%s\n", text.data());
                break;
            }
            if(c == 0x5B) // STOP MAKING INHERENTLY ASCII INCOMPATIBLE FORMATS REEEEEEEEEEEEEEEEEEEEEEE
            {
                c = fgetc(f);
                c = fgetc(f);
                c = fgetc(f);
                c = fgetc(f);
                c = fgetc(f);
                c = fgetc(f);
                
                uint32_t skip;
                int n = fread(&skip, 4, 1, f);
                fseek(f, -4, SEEK_CUR);
                fseek(f, skip, SEEK_CUR);
                
                if(n < 1)
                {
                    if(text != "")
                        printf("%s\n", text.data());
                    break;
                }
                
                if(text != "")
                {
                    //printf("%08X \"%s\"\n", start, text.data());
                    printf("%s\n", text.data());
                }
                text = "";
                
                //start = ftell(f);
Line 113: Line 279:
                //printf("%08X\n", command);
                
                // I have NO CERTAINTY that this is correct
                if(command == 0x000001C8 or command == 0x000001CF and text_memory != "")
                    puts(text_memory.data());
                
                justdidtext = false;
            }
            fseek(f, address+header_end+4, SEEK_SET);
                text += c;
                if(is_upper_jis_surrogate(c))
                    text += fgetc(f);
            }

.sob files, 32-bit word bytecode packed with a lookup table in the header and a string table at the end. I have NO IDEA what the VN engine actually does to execute its logic, only how to get the text out of the scripts, and I'm not 100% certain it's correct, it just seems to work properly.

Oretsuba Prelude and Oretsuba seem to trigger dialogue text with different commands (0x000001C8 vs 0x000001CF). I'm not certain that this is correct, but it seems to work.

Useful regexes:

   1 ^#[^&]*&
   2 <remove>
   3 
   4 \$$
   5 <remove>
   6 
   7 <[-0-9]*>(\\)?
   8 <remove>

Dumper:

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <stdint.h>
   4 #include <string.h>
   5 
   6 #include <set>
   7 #include <map>
   8 #include <vector>
   9 
  10 std::vector<std::map<uint32_t, uint32_t>> maps;
  11 
  12 std::map<uint32_t, std::string> examples;
  13 
  14 //deduplicate identical strings within each scene even if they have different addresses
  15 bool deduplciate = true;
  16 std::set<uint32_t> seen;
  17 
  18 int main(int argc, char ** argv)
  19 {
  20     int strings = 0;
  21     int totalspace = 0;
  22     for(int i = 0; i < argc; i++)
  23     {
  24         maps.clear();
  25         seen.clear();
  26         
  27         auto f = fopen(argv[i], "rb");
  28         if(!f) continue;
  29         char magic[4];
  30         int n = fread(magic, 1, 4, f);
  31         if(n != 4 or strncmp(magic, "SOB0", 4) != 0) continue;
  32         uint32_t table_size;
  33         fread(&table_size, 4, 1, f);
  34         uint32_t table_end = table_size + 0x8;
  35         int header_count = 0;
  36         //int j = 0;
  37         while(ftell(f) < table_end)
  38         {
  39             //printf("Header %d at %08X:\n", j++, ftell(f));
  40             uint32_t word_pairs;
  41             fread(&word_pairs, 4, 1, f);
  42             std::map<uint32_t, uint32_t> newmap;
  43             for(int i = 0; i < word_pairs; i++)
  44             {
  45                 uint32_t key;
  46                 uint32_t value;
  47                 fread(&key, 4, 1, f);
  48                 fread(&value, 4, 1, f);
  49                 newmap[key] = value;
  50                 //printf("%08X: %08X\n", key, value);
  51             }
  52             maps.push_back(newmap);
  53             header_count += 1;
  54         }
  55         if(maps.size() < 5)
  56         {
  57             puts("not enough maps");
  58             exit(0);
  59         }
  60         fseek(f, table_end, SEEK_SET);
  61         uint32_t strings_start;
  62         uint32_t strings_end;
  63         fread(&strings_start, 4, 1, f);
  64         fread(&strings_end, 4, 1, f);
  65         totalspace += strings_end-strings_start;
  66         uint32_t header_end = ftell(f);
  67         uint32_t code_end = strings_start+header_end;
  68         //printf("String range: %08X~%08X\n", strings_start, strings_end);
  69         
  70         // can't tell if text is content text until a command tries to use it? I think?
  71         std::string text_memory;
  72         uint32_t text_memory_addr;
  73         
  74         bool justdidtext = false;
  75         
  76         // no idea if commands have proper inline arguments in this bytecode lol
  77         while(ftell(f) < code_end)
  78         {
  79             uint32_t address = ftell(f)-header_end;
  80             uint32_t command;
  81             fread(&command, 4, 1, f);
  82             
  83             if((command & 0xFFF00000) == 0x01800000 and maps[4].count(address) and maps[4][address] >= strings_start and maps[4][address] < strings_end)
  84             {
  85                 //printf("Found command that uses string: ");
  86                 //printf("%08X: %08X (%08X) / ", address, command, maps[4][address], argv[i]);
  87                 //printf("|%08X|:(|%08X|) [%s]\n", address+header_end, maps[4][address]+header_end, argv[i]);
  88                 strings++;
  89                 fseek(f, maps[4][address]+header_end, SEEK_SET);
  90                 int c = fgetc(f);
  91                 
  92                 if(c == 0) // 00 XX XX <string
  93                     fseek(f, 2, SEEK_CUR);
  94                 else // <string>
  95                     fseek(f, -1, SEEK_CUR);
  96                 
  97                 auto start = ftell(f);
  98                 while((c = fgetc(f)) != 0);
  99                 auto end = ftell(f);
 100                 char * text = (char *)malloc(end-start);
 101                 fseek(f, start, SEEK_SET);
 102                 fread(text, 1, end-start, f);
 103                 std::string str = std::string(text);
 104                 if(!examples.count(command))
 105                     examples[command] = str;
 106                 
 107                 //if(str != "")
 108                 //    printf("%08X text %s\n", command, text);
 109                     //printf("%s\n", text);
 110                     //printf("%08X: %08X %s\n", address+header_end, command, text);
 111                 
 112                 text_memory = str;
 113                 text_memory_addr = maps[4][address];
 114                 
 115                 free(text);
 116                 justdidtext = true;
 117             }
 118             else
 119             {
 120                 // I have NO CERTAINTY that this is correct
 121                 if((command == 0x000001C8 or command == 0x000001CF) and text_memory != "" and (!deduplciate or !seen.count(text_memory_addr)))
 122                 {
 123                     puts(text_memory.data());
 124                     if(deduplciate) seen.insert(text_memory_addr);
 125                 }
 126                 
 127                 justdidtext = false;
 128             }
 129             fseek(f, address+header_end+4, SEEK_SET);
 130         }
 131         
 132         fclose(f);
 133     }
 134     //printf("Total space for strings: %d\n", totalspace);
 135     //printf("Total strings: %d\n", strings);
 136     //for(auto &[k, v] : examples)
 137     //{
 138     //    printf("%08X: %s\n", k, v.data());
 139     //}
 140 }

Oretsuba Afterstory is only vaguely similar. It uses .tob files, some kind of braindamaged bytecode, and badly terminated inline strings.

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <stdint.h>
   4 #include <string.h>
   5 
   6 #include <set>
   7 #include <map>
   8 
   9 std::map<uint32_t, std::string> map; // resume table?
  10 std::set<uint32_t> set; // resume table?
  11 
  12 bool is_upper_jis_surrogate(int c)
  13 {
  14     return (c >= 0x80 and c <= 0xA1) or (c >= 0xE0 and c <= 0xFF);
  15 }
  16 
  17 int main(int argc, char ** argv)
  18 {
  19     //int strings = 0;
  20     for(int i = 0; i < argc; i++)
  21     {
  22         map.clear();
  23         set.clear();
  24         
  25         auto f = fopen(argv[i], "rb");
  26         if(!f) continue;
  27         char magic[4];
  28         int n = fread(magic, 1, 4, f);
  29         if(n != 4 or strncmp(magic, "TOB0", 4) != 0) continue;
  30         uint32_t header_size;
  31         uint32_t header_elements;
  32         fread(&header_size, 4, 1, f);
  33         fread(&header_elements, 4, 1, f);
  34         
  35         for(int j = 0; j < header_elements; j++)
  36         {
  37             uint8_t size;
  38             fread(&size, 1, 1, f);
  39             
  40             char * text = (char *)malloc(size-4);
  41             fread(text, 1, size-4, f);
  42             
  43             uint32_t value;
  44             fread(&value, 4, 1, f);
  45             map[value] = text;
  46             
  47             free(text);
  48         }
  49         if(ftell(f) != header_size+4)
  50         {
  51             puts("desynchronized"), puts(argv[i]);
  52             exit(0);
  53         }
  54         
  55         
  56         uint32_t table_size;
  57         uint32_t table_elements;
  58         fread(&table_size, 4, 1, f);
  59         fread(&table_elements, 4, 1, f);
  60         uint32_t table_end = table_size + header_size + 4;
  61         while(ftell(f) < table_end)
  62         {
  63             //printf("Header at %08X\n", ftell(f));
  64             uint32_t word;
  65             fread(&word, 4, 1, f);
  66             set.insert(word);
  67         }
  68         
  69         fseek(f, table_end, SEEK_SET);
  70         
  71         std::string text = "";
  72         //uint32_t start = ftell(f);
  73         while(1)
  74         {
  75             auto c = fgetc(f);
  76             bool closefile = feof(f) or ferror(f) or c < 0;
  77             if(closefile)
  78             {
  79                 if(text != "")
  80                     printf("%s\n", text.data());
  81                 break;
  82             }
  83             if(c == 0x5B) // STOP MAKING INHERENTLY ASCII INCOMPATIBLE FORMATS REEEEEEEEEEEEEEEEEEEEEEE
  84             {
  85                 c = fgetc(f);
  86                 c = fgetc(f);
  87                 c = fgetc(f);
  88                 c = fgetc(f);
  89                 c = fgetc(f);
  90                 c = fgetc(f);
  91                 
  92                 uint32_t skip;
  93                 int n = fread(&skip, 4, 1, f);
  94                 fseek(f, -4, SEEK_CUR);
  95                 fseek(f, skip, SEEK_CUR);
  96                 
  97                 if(n < 1)
  98                 {
  99                     if(text != "")
 100                         printf("%s\n", text.data());
 101                     break;
 102                 }
 103                 
 104                 if(text != "")
 105                 {
 106                     //printf("%08X \"%s\"\n", start, text.data());
 107                     printf("%s\n", text.data());
 108                 }
 109                 text = "";
 110                 
 111                 //start = ftell(f);
 112             }
 113             else
 114             {
 115                 text += c;
 116                 if(is_upper_jis_surrogate(c))
 117                     text += fgetc(f);
 118             }
 119         }
 120         
 121         fclose(f);
 122     }
 123     //printf("Total strings: %d\n", strings);
 124     //for(auto &[k, v] : examples)
 125     //{
 126     //    printf("%08X: %s\n", k, v.data());
 127     //}
 128 }

Lucifen Library (last edited 2017-10-01 11:21:51 by weh)