[[Stats]] for random anime subtitles. Just what was easy to dump. ''No quality guarantees''. Not going to be maintained at all. Just for fun. 12-episode anime series have extremely small amounts of text -- 100 to 200 KB in utf-8 -- so any analysis of them is going to be extremely unstable. For reference, Hanahira is about 180 KB in utf-8. For example, Fractale is nowhere near as "hard" as the 95% metric implies it is, it just has extremely little text. Also, it's not possible to reliably reconstruct original linebreaks from most subtitles, which is something the "custom metric" desperately needs, so it's disabled here, as are the chars per line/sentence measures. Similarly, anime takes runtime hours, not reading hours, so the hours estimate has been disabled too. '''There is a graph at the bottom of the page.''' {{{#!nutable script name; kanji\n(unique); kanji\n(2+); lines; sentences; characters; lexemes; sjis bytes; sjis (dedup); Hayashi; custom\nmetric b; freqlist\n90% Target; freqlist\n92.5% Target; freqlist\n95% Target akatsuki no yona; 1502; 1162; 7680; 7597; 91369; 53915; 186039; 179430; 84.40; 77.78; 4996.40; 7377.93; 11274.90 amagami brilliant park; 1198; 888; 6351; 6377; 59968; 31252; 122344; 117303; 81.33; 80.10; 5177.90; 7111.07; 10218.85 bakemonogatari; 1266; 959; 9237; 9225; 86446; 47088; 180066; 170112; 84.67; 81.92; 3656.70; 5341.83; 8104.89 black lagoon; 1588; 1274; 12360; 12600; 111390; 60146; 235635; 226258; 81.43; 76.90; 8061.65; 10549.41; 13922.15 cardcaptor sakura; 1362; 1142; 32645; 32309; 261594; 126381; 553771; 463862; 84.87; 83.10; 2858.80; 4226.87; 7208.08 cowboy bebop; 1304; 1011; 9382; 9091; 90735; 45920; 185698; 171931; 76.54; 79.72; 6630.00; 8849.75; 12444.50 demi-chan wa kataritai; 1047; 767; 4183; 4267; 48318; 26910; 103967; 100954; 83.84; 81.66; 4106.70; 6229.09; 8807.85 devilman crybaby; 996; 647; 3193; 3554; 33912; 17285; 71529; 67021; 82.80; 79.68; 4833.00; 7020.07; 9810.00 eromanga sensei; 954; 702; 6708; 7178; 58105; 30433; 122488; 115845; 84.37; 81.01; 3230.10; 4083.33; 6473.77 eureka seven; 1489; 1240; 24996; 25379; 204927; 101555; 432784; 388647; 79.36; 77.18; 5928.20; 8118.90; 12773.77 flying witch; 904; 643; 3860; 3659; 43099; 22243; 94274; 83625; 89.10; 87.87; 3239.30; 4607.98; 7005.65 fractale; 923; 620; 3935; 4350; 33674; 17390; 80309; 76703; 83.37; 79.03; 6212.90; 9612.67; 19677.45 fune wo amu; 1115; 789; 3236; 2993; 40960; 21690; 88702; 78438; 80.40; 76.44; 7507.08; 8533.38; 11445.65 gabriel dropout; 1099; 826; 7242; 7691; 64811; 31584; 135685; 129955; 81.79; 80.93; 3947.02; 5440.77; 7723.05 gekkan shoujo nozaki-kun; 1066; 752; 4173; 4194; 54221; 28293; 118960; 107660; 81.32; 79.60; 3069.50; 4370.44; 6167.25 girls last tour; 755; 544; 4693; 4928; 33432; 17596; 73472; 66510; 89.39; 85.81; 3250.72; 4560.73; 7207.57 gochiusa; 1186; 853; 4246; 4159; 50844; 27741; 103554; 99978; 74.60; 79.72; 4895.37; 6103.83; 9338.77 hyouka; 1419; 1167; 13490; 13460; 119434; 65924; 251850; 236702; 79.33; 78.84; 5232.25; 7490.29; 10491.42 ichigo marshmallow; 1001; 696; 4243; 4546; 46564; 23118; 99339; 96966; 89.79; 78.77; 4981.13; 7109.43; 11575.08 inu x boku; 1119; 815; 5691; 5721; 49772; 26497; 104383; 96575; 82.99; 80.46; 5529.62; 8158.12; 13355.38 jinrui; 1483; 1090; 5062; 4713; 60336; 33090; 129157; 118913; 76.14; 79.25; 6277.70; 8595.39; 11314.42 jojo; 1393; 1142; 14145; 14769; 117159; 61076; 251507; 235637; 79.54; 74.06; 6609.50; 8709.38; 12506.42 joukamachi no dandelion; 1109; 834; 7446; 7293; 69609; 34207; 142146; 126896; 86.91; 83.45; 3637.95; 5277.70; 6879.80 katanagatari; 1619; 1305; 9387; 9315; 129592; 79523; 271761; 266724; 89.57; 70.98; 7559.26; 9979.54; 13437.33 kekkai sensen; 1365; 968; 4303; 3905; 53119; 25736; 113985; 96591; 75.38; 75.41; 5852.10; 7864.79; 10638.52 kono bijutsubu; 969; 681; 6042; 6327; 52008; 25949; 108025; 100336; 85.86; 83.67; 3221.10; 4310.08; 6403.05 konosuba 1~2; 1407; 1096; 12806; 13362; 113059; 58053; 237141; 222757; 81.23; 78.73; 5907.50; 7618.40; 10302.00 love lab; 1143; 871; 8080; 9114; 68736; 36349; 164909; 155865; 82.78; 81.62; 4764.10; 6742.03; 10281.62 lucky star; 1608; 1276; 11664; 11555; 156029; 79273; 339741; 311283; 83.25; 81.52; 5135.98; 7097.35; 10177.98 mahoutsukai no yome; 1222; 923; 9314; 9617; 76756; 41461; 163354; 150031; 87.73; 82.43; 3902.79; 5547.36; 9359.15 mawaru penguindrum; 1332; 1009; 10363; 10877; 105669; 52161; 220701; 196808; 82.64; 80.61; 4457.90; 6558.18; 11204.45 mikakunin; 942; 681; 6840; 7289; 59294; 31370; 125675; 118803; 85.11; 83.31; 3813.00; 5922.50; 8352.88 mob psycho 100; 1240; 944; 6886; 7473; 62249; 33280; 131039; 125729; 81.91; 79.12; 5373.73; 6906.25; 8762.80 nagi no asukara; 1297; 960; 9225; 8316; 96147; 50251; 211835; 174613; 88.20; 82.05; 3255.70; 5429.04; 8684.05 ngsrt airantou; 1326; 1021; 14463; 16011; 143071; 65751; 296831; 264531; 89.39; 88.14; 5178.65; 6941.24; 10090.22 nichijou; 1212; 933; 13498; 13719; 106440; 49991; 222473; 198180; 85.32; 81.27; 4847.90; 6716.46; 9128.35 no game no life; 1237; 910; 7015; 7192; 62792; 32958; 130894; 124833; 77.91; 76.32; 6516.25; 8330.75; 10701.43 non non biyori; 940; 706; 6149; 6435; 52003; 25933; 108793; 101528; 86.99; 83.99; 4902.70; 6443.73; 9160.85 noragami; 1179; 769; 4095; 3951; 38869; 22243; 83735; 77204; 89.62; 84.11; 4682.47; 6648.76; 10102.70 owari no seraph 1~2; 1125; 890; 10223; 10301; 85309; 44984; 180143; 162224; 82.66; 80.62; 3012.40; 4221.92; 6406.04 panty and stocking; 1236; 920; 7790; 8175; 67862; 31267; 141473; 130912; 80.34; 71.24; 8487.63; 10817.94; 14822.22 ping pong; 1023; 710; 4986; 4956; 41437; 20829; 86286; 79877; 81.17; 74.23; 7556.00; 10288.39; 13044.50 psycho pass; 1511; 1240; 10428; 10644; 98018; 51624; 208251; 199667; 69.10; 73.23; 7425.90; 9529.09; 12241.45 railgun 1~2; 1555; 1311; 27097; 27921; 222468; 114741; 475308; 428180; 79.15; 79.89; 5532.60; 7797.85; 12286.13 revolutionary girl utena; 1254; 1014; 15969; 15307; 140888; 72132; 294633; 256457; 82.08; 81.63; 3805.32; 5657.95; 9356.30 saki; 1264; 953; 12185; 12709; 103296; 54765; 220784; 197145; 79.70; 75.43; 8722.55; 13377.31; 19694.04 samflam; 1337; 1085; 11742; 12740; 100867; 52146; 240007; 225486; 75.63; 77.73; 5103.90; 7084.65; 10432.10 samurai champloo; 1322; 1012; 10894; 11001; 85687; 44945; 180967; 164467; 87.90; 84.33; 4892.15; 6723.40; 10108.57 sayonara zetsubou sensei 1~2; 1678; 1317; 13135; 13455; 134776; 67299; 281372; 257118; 80.64; 78.35; 6796.40; 8884.02; 12616.85 scryed; 1416; 1108; 10629; 10525; 115015; 61643; 238704; 225606; 78.09; 79.19; 6526.25; 8489.38; 12431.25 shiki; 1315; 1003; 6165; 8866; 84126; 44718; 194106; 179941; 87.84; 79.09; 4369.86; 6172.93; 9597.00 shinsekai yori; 1478; 1195; 11269; 11641; 104381; 57416; 219180; 206791; 80.90; 79.00; 7362.45; 11438.01; 15436.58 sora no woto; 1154; 831; 3267; 3193; 37596; 20443; 78016; 75306; 65.26; 79.72; 5043.26; 6807.25; 9297.50 spice and wolf 1~2; 1569; 1246; 8956; 8242; 116815; 62982; 241291; 220041; 76.55; 79.19; 7652.07; 10108.36; 13708.37 sword art online; 1352; 1036; 10303; 10234; 85248; 47928; 177161; 159707; 76.63; 77.12; 4827.88; 6703.61; 10016.70 tamako market; 1182; 835; 5187; 4789; 55273; 27676; 112174; 99236; 85.56; 84.67; 5117.82; 6384.93; 8988.45 tatami galaxy; 1498; 1144; 6756; 6675; 68311; 36291; 139699; 132168; 76.89; 76.11; 8174.20; 10517.20; 13698.20 toradora; 1326; 1028; 15392; 16280; 129374; 65273; 278068; 258639; 85.23; 82.32; 4694.39; 6198.58; 9832.52 trigun; 1298; 1019; 12722; 12879; 106713; 53235; 223933; 201750; 85.57; 81.24; 4828.00; 6709.69; 9902.00 twintails; 1103; 794; 5754; 5910; 55914; 27898; 112637; 108944; 77.96; 78.22; 5156.31; 6231.15; 8659.62 uchouten kazoku; 1152; 871; 6248; 6305; 56662; 31314; 118300; 110614; 86.05; 81.21; 5906.30; 8583.70; 11922.56 violet evergarden; 1078; 810; 5926; 5758; 49632; 25780; 105498; 95119; 78.45; 78.78; 4778.62; 6299.58; 9743.05 youjo senki; 1330; 1023; 5352; 5625; 49531; 26785; 103556; 98936; 65.42; 61.48; 10688.20; 13342.30; 16955.44 zankyou no terror; 1095; 782; 2979; 2636; 37006; 18453; 80014; 68142; 72.58; 73.40; 6710.55; 8903.15; 12950.51 }}} "custom metric b" is derived by: {{{#!python runlen_han = counts[0]/runs[0] runlen_hira = counts[1]/runs[1] runlen_kata = counts[2]/runs[2] avg_counts = sum(counts[:3])/3 prop_han = counts[0]/avg_counts prop_hira = counts[1]/avg_counts prop_kata = counts[2]/avg_counts avg_runs = sum(runs[:3])/3 prop_runs_han = runs[0]/avg_runs prop_runs_hira = runs[1]/avg_runs prop_runs_kata = runs[2]/avg_runs # .... properties = [ runlen_han, runlen_hira, runlen_kata, math.log(prop_han), math.log(prop_hira), math.log(prop_kata), math.log(prop_runs_han), math.log(prop_runs_hira), math.log(prop_runs_kata), ] weights = [ 1.6192223 , -0.59284526, -0.16966693, -2.30292 , 0.63782173, 0.62009275, 1.2680697 , -3.2421117 , -0.8129924 , 0.01331723 # constant term ] nothayashi = weights[-1] + sum(properties[i]*weights[i] for i in range(len(properties))) nothayashi = (1-nothayashi)*50+50 }}} where "counts" stores the number of times a writing system occurs, and "runs" stores the number of runs there are of that writing system. The weights were derived from multiple regression with keras. Dumper used for .srt files: {{{#!python #!python import sys import re def print_safe(string, end="\n"): sys.stdout.buffer.write((str(string)+end).encode("utf-8")) nullify = [ "[テレビ]", "[スピーカ]", r"\n", r"\N", "\r", ] for arg in sys.argv[1:]: with open(arg, "r", encoding="utf-8-sig") as f: groups = f.read().split("\n\n") last_group = "" for i in range(len(groups)): groups[i] = groups[i].split("\n")[2:] if "\n".join(groups[i]) == last_group: continue last_group = "\n".join(groups[i]) did_print = False for j in range(len(groups[i])): line = groups[i][j] line = re.sub("([^)]*)","",line) line = re.sub(r"\([^\)]*\)","",line) line = line.replace("《","«") line = line.replace("》","»") for null in nullify: line = line.replace(null,"") line = line.strip() if line != "": #print_safe(line) did_print = True if did_print: #print_safe("") pass #print_safe("") print_safe(arg) }}} Dumper used for .ass files: {{{#!python #!python import sys import re def print_safe(string, end="\n"): sys.stdout.buffer.write((str(string)+end).encode("utf-8")) def parsecsv(string): fields = [] insomething = False nullify = [ "[テレビ]", "[スピーカ]", r"\n", r"\N", ] for arg in sys.argv[1:]: with open(arg, "r", encoding="utf-8") as f: events = False last_group = "" for line in f: line = line.strip("\n") if events: if line.startswith("Dialogue:"): line = line.replace("Dialogue:","",1) # do not use the CSV parser for this fields = line.split(",",9) if "人类_声明" in fields[:-1]: continue if "标题" in fields[:-1]: continue if "staff" in fields[:-1]: continue if "Opening" in fields[:-1]: continue if "Ending" in fields[:-1]: continue line = fields[-1] basic_line = line # it contains drawing instructions, which we need a parser to correctly isolate and remove # line is probably just pure drawing instructions so get rid of it if r"\p" in line: continue line = re.sub(r"\{[^\}]*\}","",line) line = re.sub("([^)]*)","",line) line = re.sub(r"\([^\)]*\)","",line) line = line.strip() line = line.replace("《","«") line = line.replace("》","»") for null in nullify: line = line.replace(null,"") # probably per-character karaoke or something if len(line) <= 1 and "pos" in basic_line: continue if line != "": if last_group == line: continue last_group = line print_safe(line) if line == "[Events]": events = True }}} {{{#!graph2 }}}