Created
February 8, 2017 00:44
-
-
Save nyarla/d88917bf19b65143f9fc4107e20ba3df to your computer and use it in GitHub Desktop.
A reverse porting to Modern Javascript from optimized implementation of TinySegmenter written by Julia-lang.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* TinySegmenter.optmized.js | |
| * ========================= | |
| * | |
| * * A reverse porting to Modern Javascript from optimized implementation of TinySegmenter written by Julia-lang. | |
| * | |
| * LICENSE | |
| * ======= | |
| * (c) 2008 Taku Kudo <[email protected]> | |
| * (c) 2015 Michiaki Ariga a.k.a chezou <[email protected]> | |
| * (c) 2017 Naoki OKAMURA a.k.a nyarla <[email protected]> | |
| * | |
| * All rights reserved. | |
| * | |
| * Redistribution and use in source and binary forms, with or without | |
| * modification, are permitted provided that the following conditions are met: | |
| * | |
| * * Redistributions of source code must retain the above copyright notice, | |
| * this list of conditions and the following disclaimer. | |
| * * Redistributions in binary form must reproduce the above copyright | |
| * notice, this list of conditions and the following disclaimer in the | |
| * documentation and/or other materials provided with the distribution. | |
| * * Neither the name of the <ORGANIZATION> nor the names of its | |
| * contributors may be used to endorse or promote products derived from this | |
| * software without specific prior written permission. | |
| * | |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
| * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| * */ | |
| "use strict"; | |
| var BIAS = -332; | |
| var BC1 = new Map([ | |
| ["HH" , 6 ], | |
| ["II" , 2461 ], | |
| ["KH" , 406 ], | |
| ["OH" , -1378] | |
| ]); | |
| var BC2 = new Map([ | |
| ["AA" , -3267 ], | |
| ["AI" , 2744 ], | |
| ["AN" , -878 ], | |
| ["HH" , -4070 ], | |
| ["HM" , -1711 ], | |
| ["HN" , 4012 ], | |
| ["HO" , 3761 ], | |
| ["IA" , 1327 ], | |
| ["IH" , -1184 ], | |
| ["II" , -1332 ], | |
| ["IK" , 1721 ], | |
| ["IO" , 5492 ], | |
| ["KI" , 3831 ], | |
| ["KK" , -8741 ], | |
| ["MH" , -3132 ], | |
| ["MK" , 3334 ], | |
| ["OO" , -2920] | |
| ]); | |
| var BC3 = new Map([ | |
| ["HH" , 996 ], | |
| ["HI" , 626 ], | |
| ["HK" , -721 ], | |
| ["HN" , -1307 ], | |
| ["HO" , -836 ], | |
| ["IH" , -301 ], | |
| ["KK" , 2762 ], | |
| ["MK" , 1079 ], | |
| ["MM" , 4034 ], | |
| ["OA" , -1652 ], | |
| ["OH" , 266] | |
| ]); | |
| var BP1 = new Map([ | |
| ["BB" , 295 ], | |
| ["OB" , 304 ], | |
| ["OO" , -125 ], | |
| ["UB" , 352] | |
| ]); | |
| var BP2 = new Map([ | |
| ["BO" , 60 ], | |
| ["OO" , -1762] | |
| ]); | |
| var BQ1 = new Map([ | |
| ["BHH" , 1150 ], | |
| ["BHM" , 1521 ], | |
| ["BII" , -1158 ], | |
| ["BIM" , 886 ], | |
| ["BMH" , 1208 ], | |
| ["BNH" , 449 ], | |
| ["BOH" , -91 ], | |
| ["BOO" , -2597 ], | |
| ["OHI" , 451 ], | |
| ["OIH" , -296 ], | |
| ["OKA" , 1851 ], | |
| ["OKH" , -1020 ], | |
| ["OKK" , 904 ], | |
| ["OOO" , 2965] | |
| ]); | |
| var BQ2 = new Map([ | |
| ["BHH" , 118 ], | |
| ["BHI" , -1159 ], | |
| ["BHM" , 466 ], | |
| ["BIH" , -919 ], | |
| ["BKK" , -1720 ], | |
| ["BKO" , 864 ], | |
| ["OHH" , -1139 ], | |
| ["OHM" , -181 ], | |
| ["OIH" , 153 ], | |
| ["UHI" , -1146] | |
| ]); | |
| var BQ3 = new Map([ | |
| ["BHH" , -792 ], | |
| ["BHI" , 2664 ], | |
| ["BII" , -299 ], | |
| ["BKI" , 419 ], | |
| ["BMH" , 937 ], | |
| ["BMM" , 8335 ], | |
| ["BNN" , 998 ], | |
| ["BOH" , 775 ], | |
| ["OHH" , 2174 ], | |
| ["OHM" , 439 ], | |
| ["OII" , 280 ], | |
| ["OKH" , 1798 ], | |
| ["OKI" , -793 ], | |
| ["OKO" , -2242 ], | |
| ["OMH" , -2402 ], | |
| ["OOO" , 11699] | |
| ]); | |
| var BQ4 = new Map([ | |
| ["BHH" , -3895 ], | |
| ["BIH" , 3761 ], | |
| ["BII" , -4654 ], | |
| ["BIK" , 1348 ], | |
| ["BKK" , -1806 ], | |
| ["BMI" , -3385 ], | |
| ["BOO" , -12396 ], | |
| ["OAH" , 926 ], | |
| ["OHH" , 266 ], | |
| ["OHK" , -2036 ], | |
| ["ONN" , -973] | |
| ]); | |
| var BW1 = new Map([ | |
| [",と" , 660 ], | |
| [",同" , 727 ], | |
| ["B1あ" , 1404 ], | |
| ["B1同" , 542 ], | |
| ["、と" , 660 ], | |
| ["、同" , 727 ], | |
| ["」と" , 1682 ], | |
| ["あっ" , 1505 ], | |
| ["いう" , 1743 ], | |
| ["いっ" , -2055 ], | |
| ["いる" , 672 ], | |
| ["うし" , -4817 ], | |
| ["うん" , 665 ], | |
| ["から" , 3472 ], | |
| ["がら" , 600 ], | |
| ["こう" , -790 ], | |
| ["こと" , 2083 ], | |
| ["こん" , -1262 ], | |
| ["さら" , -4143 ], | |
| ["さん" , 4573 ], | |
| ["した" , 2641 ], | |
| ["して" , 1104 ], | |
| ["すで" , -3399 ], | |
| ["そこ" , 1977 ], | |
| ["それ" , -871 ], | |
| ["たち" , 1122 ], | |
| ["ため" , 601 ], | |
| ["った" , 3463 ], | |
| ["つい" , -802 ], | |
| ["てい" , 805 ], | |
| ["てき" , 1249 ], | |
| ["でき" , 1127 ], | |
| ["です" , 3445 ], | |
| ["では" , 844 ], | |
| ["とい" , -4915 ], | |
| ["とみ" , 1922 ], | |
| ["どこ" , 3887 ], | |
| ["ない" , 5713 ], | |
| ["なっ" , 3015 ], | |
| ["など" , 7379 ], | |
| ["なん" , -1113 ], | |
| ["にし" , 2468 ], | |
| ["には" , 1498 ], | |
| ["にも" , 1671 ], | |
| ["に対" , -912 ], | |
| ["の一" , -501 ], | |
| ["の中" , 741 ], | |
| ["ませ" , 2448 ], | |
| ["まで" , 1711 ], | |
| ["まま" , 2600 ], | |
| ["まる" , -2155 ], | |
| ["やむ" , -1947 ], | |
| ["よっ" , -2565 ], | |
| ["れた" , 2369 ], | |
| ["れで" , -913 ], | |
| ["をし" , 1860 ], | |
| ["を見" , 731 ], | |
| ["亡く" , -1886 ], | |
| ["京都" , 2558 ], | |
| ["取り" , -2784 ], | |
| ["大き" , -2604 ], | |
| ["大阪" , 1497 ], | |
| ["平方" , -2314 ], | |
| ["引き" , -1336 ], | |
| ["日本" , -195 ], | |
| ["本当" , -2423 ], | |
| ["毎日" , -2113 ], | |
| ["目指" , -724 ], | |
| ["B1あ" , 1404 ], | |
| ["B1同" , 542 ], | |
| ["」と" , 1682] | |
| ]); | |
| var BW2 = new Map([ | |
| [".." , -11822 ], | |
| ["11" , -669 ], | |
| ["――" , -5730 ], | |
| ["−−" , -13175 ], | |
| ["いう" , -1609 ], | |
| ["うか" , 2490 ], | |
| ["かし" , -1350 ], | |
| ["かも" , -602 ], | |
| ["から" , -7194 ], | |
| ["かれ" , 4612 ], | |
| ["がい" , 853 ], | |
| ["がら" , -3198 ], | |
| ["きた" , 1941 ], | |
| ["くな" , -1597 ], | |
| ["こと" , -8392 ], | |
| ["この" , -4193 ], | |
| ["させ" , 4533 ], | |
| ["され" , 13168 ], | |
| ["さん" , -3977 ], | |
| ["しい" , -1819 ], | |
| ["しか" , -545 ], | |
| ["した" , 5078 ], | |
| ["して" , 972 ], | |
| ["しな" , 939 ], | |
| ["その" , -3744 ], | |
| ["たい" , -1253 ], | |
| ["たた" , -662 ], | |
| ["ただ" , -3857 ], | |
| ["たち" , -786 ], | |
| ["たと" , 1224 ], | |
| ["たは" , -939 ], | |
| ["った" , 4589 ], | |
| ["って" , 1647 ], | |
| ["っと" , -2094 ], | |
| ["てい" , 6144 ], | |
| ["てき" , 3640 ], | |
| ["てく" , 2551 ], | |
| ["ては" , -3110 ], | |
| ["ても" , -3065 ], | |
| ["でい" , 2666 ], | |
| ["でき" , -1528 ], | |
| ["でし" , -3828 ], | |
| ["です" , -4761 ], | |
| ["でも" , -4203 ], | |
| ["とい" , 1890 ], | |
| ["とこ" , -1746 ], | |
| ["とと" , -2279 ], | |
| ["との" , 720 ], | |
| ["とみ" , 5168 ], | |
| ["とも" , -3941 ], | |
| ["ない" , -2488 ], | |
| ["なが" , -1313 ], | |
| ["など" , -6509 ], | |
| ["なの" , 2614 ], | |
| ["なん" , 3099 ], | |
| ["にお" , -1615 ], | |
| ["にし" , 2748 ], | |
| ["にな" , 2454 ], | |
| ["によ" , -7236 ], | |
| ["に対" , -14943 ], | |
| ["に従" , -4688 ], | |
| ["に関" , -11388 ], | |
| ["のか" , 2093 ], | |
| ["ので" , -7059 ], | |
| ["のに" , -6041 ], | |
| ["のの" , -6125 ], | |
| ["はい" , 1073 ], | |
| ["はが" , -1033 ], | |
| ["はず" , -2532 ], | |
| ["ばれ" , 1813 ], | |
| ["まし" , -1316 ], | |
| ["まで" , -6621 ], | |
| ["まれ" , 5409 ], | |
| ["めて" , -3153 ], | |
| ["もい" , 2230 ], | |
| ["もの" , -10713 ], | |
| ["らか" , -944 ], | |
| ["らし" , -1611 ], | |
| ["らに" , -1897 ], | |
| ["りし" , 651 ], | |
| ["りま" , 1620 ], | |
| ["れた" , 4270 ], | |
| ["れて" , 849 ], | |
| ["れば" , 4114 ], | |
| ["ろう" , 6067 ], | |
| ["われ" , 7901 ], | |
| ["を通" , -11877 ], | |
| ["んだ" , 728 ], | |
| ["んな" , -4115 ], | |
| ["一人" , 602 ], | |
| ["一方" , -1375 ], | |
| ["一日" , 970 ], | |
| ["一部" , -1051 ], | |
| ["上が" , -4479 ], | |
| ["会社" , -1116 ], | |
| ["出て" , 2163 ], | |
| ["分の" , -7758 ], | |
| ["同党" , 970 ], | |
| ["同日" , -913 ], | |
| ["大阪" , -2471 ], | |
| ["委員" , -1250 ], | |
| ["少な" , -1050 ], | |
| ["年度" , -8669 ], | |
| ["年間" , -1626 ], | |
| ["府県" , -2363 ], | |
| ["手権" , -1982 ], | |
| ["新聞" , -4066 ], | |
| ["日新" , -722 ], | |
| ["日本" , -7068 ], | |
| ["日米" , 3372 ], | |
| ["曜日" , -601 ], | |
| ["朝鮮" , -2355 ], | |
| ["本人" , -2697 ], | |
| ["東京" , -1543 ], | |
| ["然と" , -1384 ], | |
| ["社会" , -1276 ], | |
| ["立て" , -990 ], | |
| ["第に" , -1612 ], | |
| ["米国" , -4268 ], | |
| ["11" , -669] | |
| ]); | |
| var BW3 = new Map([ | |
| ["あた" , -2194 ], | |
| ["あり" , 719 ], | |
| ["ある" , 3846 ], | |
| ["い." , -1185 ], | |
| ["い。" , -1185 ], | |
| ["いい" , 5308 ], | |
| ["いえ" , 2079 ], | |
| ["いく" , 3029 ], | |
| ["いた" , 2056 ], | |
| ["いっ" , 1883 ], | |
| ["いる" , 5600 ], | |
| ["いわ" , 1527 ], | |
| ["うち" , 1117 ], | |
| ["うと" , 4798 ], | |
| ["えと" , 1454 ], | |
| ["か." , 2857 ], | |
| ["か。" , 2857 ], | |
| ["かけ" , -743 ], | |
| ["かっ" , -4098 ], | |
| ["かに" , -669 ], | |
| ["から" , 6520 ], | |
| ["かり" , -2670 ], | |
| ["が,", 1816 ], | |
| ["が、" , 1816 ], | |
| ["がき" , -4855 ], | |
| ["がけ" , -1127 ], | |
| ["がっ" , -913 ], | |
| ["がら" , -4977 ], | |
| ["がり" , -2064 ], | |
| ["きた" , 1645 ], | |
| ["けど" , 1374 ], | |
| ["こと" , 7397 ], | |
| ["この" , 1542 ], | |
| ["ころ" , -2757 ], | |
| ["さい" , -714 ], | |
| ["さを" , 976 ], | |
| ["し,", 1557 ], | |
| ["し、" , 1557 ], | |
| ["しい" , -3714 ], | |
| ["した" , 3562 ], | |
| ["して" , 1449 ], | |
| ["しな" , 2608 ], | |
| ["しま" , 1200 ], | |
| ["す." , -1310 ], | |
| ["す。" , -1310 ], | |
| ["する" , 6521 ], | |
| ["ず,", 3426 ], | |
| ["ず、" , 3426 ], | |
| ["ずに" , 841 ], | |
| ["そう" , 428 ], | |
| ["た." , 8875 ], | |
| ["た。" , 8875 ], | |
| ["たい" , -594 ], | |
| ["たの" , 812 ], | |
| ["たり" , -1183 ], | |
| ["たる" , -853 ], | |
| ["だ." , 4098 ], | |
| ["だ。" , 4098 ], | |
| ["だっ" , 1004 ], | |
| ["った" , -4748 ], | |
| ["って" , 300 ], | |
| ["てい" , 6240 ], | |
| ["てお" , 855 ], | |
| ["ても" , 302 ], | |
| ["です" , 1437 ], | |
| ["でに" , -1482 ], | |
| ["では" , 2295 ], | |
| ["とう" , -1387 ], | |
| ["とし" , 2266 ], | |
| ["との" , 541 ], | |
| ["とも" , -3543 ], | |
| ["どう" , 4664 ], | |
| ["ない" , 1796 ], | |
| ["なく" , -903 ], | |
| ["など" , 2135 ], | |
| ["に,", -1021 ], | |
| ["に、" , -1021 ], | |
| ["にし" , 1771 ], | |
| ["にな" , 1906 ], | |
| ["には" , 2644 ], | |
| ["の,", -724 ], | |
| ["の、" , -724 ], | |
| ["の子" , -1000 ], | |
| ["は,", 1337 ], | |
| ["は、" , 1337 ], | |
| ["べき" , 2181 ], | |
| ["まし" , 1113 ], | |
| ["ます" , 6943 ], | |
| ["まっ" , -1549 ], | |
| ["まで" , 6154 ], | |
| ["まれ" , -793 ], | |
| ["らし" , 1479 ], | |
| ["られ" , 6820 ], | |
| ["るる" , 3818 ], | |
| ["れ,", 854 ], | |
| ["れ、" , 854 ], | |
| ["れた" , 1850 ], | |
| ["れて" , 1375 ], | |
| ["れば" , -3246 ], | |
| ["れる" , 1091 ], | |
| ["われ" , -605 ], | |
| ["んだ" , 606 ], | |
| ["んで" , 798 ], | |
| ["カ月" , 990 ], | |
| ["会議" , 860 ], | |
| ["入り" , 1232 ], | |
| ["大会" , 2217 ], | |
| ["始め" , 1681 ], | |
| ["市" , 965 ], | |
| ["新聞" , -5055 ], | |
| ["日,", 974 ], | |
| ["日、" , 974 ], | |
| ["社会" , 2024 ], | |
| ["カ月" , 990] | |
| ]); | |
| var TC1 = new Map([ | |
| ["AAA" , 1093 ], | |
| ["HHH" , 1029 ], | |
| ["HHM" , 580 ], | |
| ["HII" , 998 ], | |
| ["HOH" , -390 ], | |
| ["HOM" , -331 ], | |
| ["IHI" , 1169 ], | |
| ["IOH" , -142 ], | |
| ["IOI" , -1015 ], | |
| ["IOM" , 467 ], | |
| ["MMH" , 187 ], | |
| ["OOI" , -1832] | |
| ]); | |
| var TC2 = new Map([ | |
| ["HHO" , 2088 ], | |
| ["HII" , -1023 ], | |
| ["HMM" , -1154 ], | |
| ["IHI" , -1965 ], | |
| ["KKH" , 703 ], | |
| ["OII" , -2649] | |
| ]); | |
| var TC3 = new Map([ | |
| ["AAA" , -294 ], | |
| ["HHH" , 346 ], | |
| ["HHI" , -341 ], | |
| ["HII" , -1088 ], | |
| ["HIK" , 731 ], | |
| ["HOH" , -1486 ], | |
| ["IHH" , 128 ], | |
| ["IHI" , -3041 ], | |
| ["IHO" , -1935 ], | |
| ["IIH" , -825 ], | |
| ["IIM" , -1035 ], | |
| ["IOI" , -542 ], | |
| ["KHH" , -1216 ], | |
| ["KKA" , 491 ], | |
| ["KKH" , -1217 ], | |
| ["KOK" , -1009 ], | |
| ["MHH" , -2694 ], | |
| ["MHM" , -457 ], | |
| ["MHO" , 123 ], | |
| ["MMH" , -471 ], | |
| ["NNH" , -1689 ], | |
| ["NNO" , 662 ], | |
| ["OHO" , -3393] | |
| ]); | |
| var TC4 = new Map([ | |
| ["HHH" , -203 ], | |
| ["HHI" , 1344 ], | |
| ["HHK" , 365 ], | |
| ["HHM" , -122 ], | |
| ["HHN" , 182 ], | |
| ["HHO" , 669 ], | |
| ["HIH" , 804 ], | |
| ["HII" , 679 ], | |
| ["HOH" , 446 ], | |
| ["IHH" , 695 ], | |
| ["IHO" , -2324 ], | |
| ["IIH" , 321 ], | |
| ["III" , 1497 ], | |
| ["IIO" , 656 ], | |
| ["IOO" , 54 ], | |
| ["KAK" , 4845 ], | |
| ["KKA" , 3386 ], | |
| ["KKK" , 3065 ], | |
| ["MHH" , -405 ], | |
| ["MHI" , 201 ], | |
| ["MMH" , -241 ], | |
| ["MMM" , 661 ], | |
| ["MOM" , 841] | |
| ]); | |
| var TQ1 = new Map([ | |
| ["BHHH" , -227 ], | |
| ["BHHI" , 316 ], | |
| ["BHIH" , -132 ], | |
| ["BIHH" , 60 ], | |
| ["BIII" , 1595 ], | |
| ["BNHH" , -744 ], | |
| ["BOHH" , 225 ], | |
| ["BOOO" , -908 ], | |
| ["OAKK" , 482 ], | |
| ["OHHH" , 281 ], | |
| ["OHIH" , 249 ], | |
| ["OIHI" , 200 ], | |
| ["OIIH" , -68] | |
| ]); | |
| var TQ2 = new Map([ | |
| ["BIHH" , -1401 ], | |
| ["BIII" , -1033 ], | |
| ["BKAK" , -543 ], | |
| ["BOOO" , -5591] | |
| ]); | |
| var TQ3 = new Map([ | |
| ["BHHH" , 478 ], | |
| ["BHHM" , -1073 ], | |
| ["BHIH" , 222 ], | |
| ["BHII" , -504 ], | |
| ["BIIH" , -116 ], | |
| ["BIII" , -105 ], | |
| ["BMHI" , -863 ], | |
| ["BMHM" , -464 ], | |
| ["BOMH" , 620 ], | |
| ["OHHH" , 346 ], | |
| ["OHHI" , 1729 ], | |
| ["OHII" , 997 ], | |
| ["OHMH" , 481 ], | |
| ["OIHH" , 623 ], | |
| ["OIIH" , 1344 ], | |
| ["OKAK" , 2792 ], | |
| ["OKHH" , 587 ], | |
| ["OKKA" , 679 ], | |
| ["OOHH" , 110 ], | |
| ["OOII" , -685] | |
| ]); | |
| var TQ4 = new Map([ | |
| ["BHHH" , -721 ], | |
| ["BHHM" , -3604 ], | |
| ["BHII" , -966 ], | |
| ["BIIH" , -607 ], | |
| ["BIII" , -2181 ], | |
| ["OAAA" , -2763 ], | |
| ["OAKK" , 180 ], | |
| ["OHHH" , -294 ], | |
| ["OHHI" , 2446 ], | |
| ["OHHO" , 480 ], | |
| ["OHIH" , -1573 ], | |
| ["OIHH" , 1935 ], | |
| ["OIHI" , -493 ], | |
| ["OIIH" , 626 ], | |
| ["OIII" , -4007 ], | |
| ["OKAK" , -8156] | |
| ]); | |
| var TW1 = new Map([ | |
| ["につい" , -4681 ], | |
| ["東京都" , 2026] | |
| ]); | |
| var TW2 = new Map([ | |
| ["ある程" , -2049 ], | |
| ["いった" , -1256 ], | |
| ["ころが" , -2434 ], | |
| ["しょう" , 3873 ], | |
| ["その後" , -4430 ], | |
| ["だって" , -1049 ], | |
| ["ていた" , 1833 ], | |
| ["として" , -4657 ], | |
| ["ともに" , -4517 ], | |
| ["もので" , 1882 ], | |
| ["一気に" , -792 ], | |
| ["初めて" , -1512 ], | |
| ["同時に" , -8097 ], | |
| ["大きな" , -1255 ], | |
| ["対して" , -2721 ], | |
| ["社会党" , -3216] | |
| ]); | |
| var TW3 = new Map([ | |
| ["いただ" , -1734 ], | |
| ["してい" , 1314 ], | |
| ["として" , -4314 ], | |
| ["につい" , -5483 ], | |
| ["にとっ" , -5989 ], | |
| ["に当た" , -6247 ], | |
| ["ので,", -727 ], | |
| ["ので、" , -727 ], | |
| ["のもの" , -600 ], | |
| ["れから" , -3752 ], | |
| ["十二月" , -2287] | |
| ]); | |
| var TW4 = new Map([ | |
| ["いう." , 8576 ], | |
| ["いう。" , 8576 ], | |
| ["からな" , -2348 ], | |
| ["してい" , 2958 ], | |
| ["たが,", 1516 ], | |
| ["たが、" , 1516 ], | |
| ["ている" , 1538 ], | |
| ["という" , 1349 ], | |
| ["ました" , 5543 ], | |
| ["ません" , 1097 ], | |
| ["ようと" , -4258 ], | |
| ["よると" , 5865] | |
| ]); | |
| var UC1 = new Map([ | |
| ["A" , 484 ], | |
| ["K" , 93 ], | |
| ["M" , 645 ], | |
| ["O" , -505] | |
| ]); | |
| var UC2 = new Map([ | |
| ["A" , 819 ], | |
| ["H" , 1059 ], | |
| ["I" , 409 ], | |
| ["M" , 3987 ], | |
| ["N" , 5775 ], | |
| ["O" , 646] | |
| ]); | |
| var UC3 = new Map([ | |
| ["A" , -1370 ], | |
| ["I" , 2311] | |
| ]); | |
| var UC4 = new Map([ | |
| ["A" , -2643 ], | |
| ["H" , 1809 ], | |
| ["I" , -1032 ], | |
| ["K" , -3450 ], | |
| ["M" , 3565 ], | |
| ["N" , 3876 ], | |
| ["O" , 6646] | |
| ]); | |
| var UC5 = new Map([ | |
| ["H" , 313 ], | |
| ["I" , -1238 ], | |
| ["K" , -799 ], | |
| ["M" , 539 ], | |
| ["O" , -831] | |
| ]); | |
| var UC6 = new Map([ | |
| ["H" , -506 ], | |
| ["I" , -253 ], | |
| ["K" , 87 ], | |
| ["M" , 247 ], | |
| ["O" , -387] | |
| ]); | |
| var UP1 = new Map([ | |
| ["O" , -214] | |
| ]); | |
| var UP2 = new Map([ | |
| ["B" , 69 ], | |
| ["O" , 935] | |
| ]); | |
| var UP3 = new Map([ | |
| ["B" , 189] | |
| ]); | |
| var UQ1 = new Map([ | |
| ["BH" , 21 ], | |
| ["BI" , -12 ], | |
| ["BK" , -99 ], | |
| ["BN" , 142 ], | |
| ["BO" , -56 ], | |
| ["OH" , -95 ], | |
| ["OI" , 477 ], | |
| ["OK" , 410 ], | |
| ["OO" , -2422] | |
| ]); | |
| var UQ2 = new Map([ | |
| ["BH" , 216 ], | |
| ["BI" , 113 ], | |
| ["OK" , 1759] | |
| ]); | |
| var UQ3 = new Map([ | |
| ["BA" , -479 ], | |
| ["BH" , 42 ], | |
| ["BI" , 1913 ], | |
| ["BK" , -7198 ], | |
| ["BM" , 3160 ], | |
| ["BN" , 6427 ], | |
| ["BO" , 14761 ], | |
| ["OI" , -827 ], | |
| ["ON" , -3212] | |
| ]); | |
| var UW1 = new Map([ | |
| ["," , 156 ], | |
| ["、" , 156 ], | |
| ["「" , -463 ], | |
| ["あ" , -941 ], | |
| ["う" , -127 ], | |
| ["が" , -553 ], | |
| ["き" , 121 ], | |
| ["こ" , 505 ], | |
| ["で" , -201 ], | |
| ["と" , -547 ], | |
| ["ど" , -123 ], | |
| ["に" , -789 ], | |
| ["の" , -185 ], | |
| ["は" , -847 ], | |
| ["も" , -466 ], | |
| ["や" , -470 ], | |
| ["よ" , 182 ], | |
| ["ら" , -292 ], | |
| ["り" , 208 ], | |
| ["れ" , 169 ], | |
| ["を" , -446 ], | |
| ["ん" , -137 ], | |
| ["・" , -135 ], | |
| ["主" , -402 ], | |
| ["京" , -268 ], | |
| ["区" , -912 ], | |
| ["午" , 871 ], | |
| ["国" , -460 ], | |
| ["大" , 561 ], | |
| ["委" , 729 ], | |
| ["市" , -411 ], | |
| ["日" , -141 ], | |
| ["理" , 361 ], | |
| ["生" , -408 ], | |
| ["県" , -386 ], | |
| ["都" , -718 ], | |
| ["「" , -463 ], | |
| ["・" , -135] | |
| ]); | |
| var UW2 = new Map([ | |
| ["," , -829 ], | |
| ["、" , -829 ], | |
| ["〇" , 892 ], | |
| ["「" , -645 ], | |
| ["」" , 3145 ], | |
| ["あ" , -538 ], | |
| ["い" , 505 ], | |
| ["う" , 134 ], | |
| ["お" , -502 ], | |
| ["か" , 1454 ], | |
| ["が" , -856 ], | |
| ["く" , -412 ], | |
| ["こ" , 1141 ], | |
| ["さ" , 878 ], | |
| ["ざ" , 540 ], | |
| ["し" , 1529 ], | |
| ["す" , -675 ], | |
| ["せ" , 300 ], | |
| ["そ" , -1011 ], | |
| ["た" , 188 ], | |
| ["だ" , 1837 ], | |
| ["つ" , -949 ], | |
| ["て" , -291 ], | |
| ["で" , -268 ], | |
| ["と" , -981 ], | |
| ["ど" , 1273 ], | |
| ["な" , 1063 ], | |
| ["に" , -1764 ], | |
| ["の" , 130 ], | |
| ["は" , -409 ], | |
| ["ひ" , -1273 ], | |
| ["べ" , 1261 ], | |
| ["ま" , 600 ], | |
| ["も" , -1263 ], | |
| ["や" , -402 ], | |
| ["よ" , 1639 ], | |
| ["り" , -579 ], | |
| ["る" , -694 ], | |
| ["れ" , 571 ], | |
| ["を" , -2516 ], | |
| ["ん" , 2095 ], | |
| ["ア" , -587 ], | |
| ["カ" , 306 ], | |
| ["キ" , 568 ], | |
| ["ッ" , 831 ], | |
| ["三" , -758 ], | |
| ["不" , -2150 ], | |
| ["世" , -302 ], | |
| ["中" , -968 ], | |
| ["主" , -861 ], | |
| ["事" , 492 ], | |
| ["人" , -123 ], | |
| ["会" , 978 ], | |
| ["保" , 362 ], | |
| ["入" , 548 ], | |
| ["初" , -3025 ], | |
| ["副" , -1566 ], | |
| ["北" , -3414 ], | |
| ["区" , -422 ], | |
| ["大" , -1769 ], | |
| ["天" , -865 ], | |
| ["太" , -483 ], | |
| ["子" , -1519 ], | |
| ["学" , 760 ], | |
| ["実" , 1023 ], | |
| ["小" , -2009 ], | |
| ["市" , -813 ], | |
| ["年" , -1060 ], | |
| ["強" , 1067 ], | |
| ["手" , -1519 ], | |
| ["揺" , -1033 ], | |
| ["政" , 1522 ], | |
| ["文" , -1355 ], | |
| ["新" , -1682 ], | |
| ["日" , -1815 ], | |
| ["明" , -1462 ], | |
| ["最" , -630 ], | |
| ["朝" , -1843 ], | |
| ["本" , -1650 ], | |
| ["東" , -931 ], | |
| ["果" , -665 ], | |
| ["次" , -2378 ], | |
| ["民" , -180 ], | |
| ["気" , -1740 ], | |
| ["理" , 752 ], | |
| ["発" , 529 ], | |
| ["目" , -1584 ], | |
| ["相" , -242 ], | |
| ["県" , -1165 ], | |
| ["立" , -763 ], | |
| ["第" , 810 ], | |
| ["米" , 509 ], | |
| ["自" , -1353 ], | |
| ["行" , 838 ], | |
| ["西" , -744 ], | |
| ["見" , -3874 ], | |
| ["調" , 1010 ], | |
| ["議" , 1198 ], | |
| ["込" , 3041 ], | |
| ["開" , 1758 ], | |
| ["間" , -1257 ], | |
| ["「" , -645 ], | |
| ["」" , 3145 ], | |
| ["ッ" , 831 ], | |
| ["ア" , -587 ], | |
| ["カ" , 306 ], | |
| ["キ" , 568] | |
| ]); | |
| var UW3 = new Map([ | |
| ["," , 4889 ], | |
| ["1" , -800 ], | |
| ["−" , -1723 ], | |
| ["、" , 4889 ], | |
| ["々" , -2311 ], | |
| ["〇" , 5827 ], | |
| ["」" , 2670 ], | |
| ["〓" , -3573 ], | |
| ["あ" , -2696 ], | |
| ["い" , 1006 ], | |
| ["う" , 2342 ], | |
| ["え" , 1983 ], | |
| ["お" , -4864 ], | |
| ["か" , -1163 ], | |
| ["が" , 3271 ], | |
| ["く" , 1004 ], | |
| ["け" , 388 ], | |
| ["げ" , 401 ], | |
| ["こ" , -3552 ], | |
| ["ご" , -3116 ], | |
| ["さ" , -1058 ], | |
| ["し" , -395 ], | |
| ["す" , 584 ], | |
| ["せ" , 3685 ], | |
| ["そ" , -5228 ], | |
| ["た" , 842 ], | |
| ["ち" , -521 ], | |
| ["っ" , -1444 ], | |
| ["つ" , -1081 ], | |
| ["て" , 6167 ], | |
| ["で" , 2318 ], | |
| ["と" , 1691 ], | |
| ["ど" , -899 ], | |
| ["な" , -2788 ], | |
| ["に" , 2745 ], | |
| ["の" , 4056 ], | |
| ["は" , 4555 ], | |
| ["ひ" , -2171 ], | |
| ["ふ" , -1798 ], | |
| ["へ" , 1199 ], | |
| ["ほ" , -5516 ], | |
| ["ま" , -4384 ], | |
| ["み" , -120 ], | |
| ["め" , 1205 ], | |
| ["も" , 2323 ], | |
| ["や" , -788 ], | |
| ["よ" , -202 ], | |
| ["ら" , 727 ], | |
| ["り" , 649 ], | |
| ["る" , 5905 ], | |
| ["れ" , 2773 ], | |
| ["わ" , -1207 ], | |
| ["を" , 6620 ], | |
| ["ん" , -518 ], | |
| ["ア" , 551 ], | |
| ["グ" , 1319 ], | |
| ["ス" , 874 ], | |
| ["ッ" , -1350 ], | |
| ["ト" , 521 ], | |
| ["ム" , 1109 ], | |
| ["ル" , 1591 ], | |
| ["ロ" , 2201 ], | |
| ["ン" , 278 ], | |
| ["・" , -3794 ], | |
| ["一" , -1619 ], | |
| ["下" , -1759 ], | |
| ["世" , -2087 ], | |
| ["両" , 3815 ], | |
| ["中" , 653 ], | |
| ["主" , -758 ], | |
| ["予" , -1193 ], | |
| ["二" , 974 ], | |
| ["人" , 2742 ], | |
| ["今" , 792 ], | |
| ["他" , 1889 ], | |
| ["以" , -1368 ], | |
| ["低" , 811 ], | |
| ["何" , 4265 ], | |
| ["作" , -361 ], | |
| ["保" , -2439 ], | |
| ["元" , 4858 ], | |
| ["党" , 3593 ], | |
| ["全" , 1574 ], | |
| ["公" , -3030 ], | |
| ["六" , 755 ], | |
| ["共" , -1880 ], | |
| ["円" , 5807 ], | |
| ["再" , 3095 ], | |
| ["分" , 457 ], | |
| ["初" , 2475 ], | |
| ["別" , 1129 ], | |
| ["前" , 2286 ], | |
| ["副" , 4437 ], | |
| ["力" , 365 ], | |
| ["動" , -949 ], | |
| ["務" , -1872 ], | |
| ["化" , 1327 ], | |
| ["北" , -1038 ], | |
| ["区" , 4646 ], | |
| ["千" , -2309 ], | |
| ["午" , -783 ], | |
| ["協" , -1006 ], | |
| ["口" , 483 ], | |
| ["右" , 1233 ], | |
| ["各" , 3588 ], | |
| ["合" , -241 ], | |
| ["同" , 3906 ], | |
| ["和" , -837 ], | |
| ["員" , 4513 ], | |
| ["国" , 642 ], | |
| ["型" , 1389 ], | |
| ["場" , 1219 ], | |
| ["外" , -241 ], | |
| ["妻" , 2016 ], | |
| ["学" , -1356 ], | |
| ["安" , -423 ], | |
| ["実" , -1008 ], | |
| ["家" , 1078 ], | |
| ["小" , -513 ], | |
| ["少" , -3102 ], | |
| ["州" , 1155 ], | |
| ["市" , 3197 ], | |
| ["平" , -1804 ], | |
| ["年" , 2416 ], | |
| ["広" , -1030 ], | |
| ["府" , 1605 ], | |
| ["度" , 1452 ], | |
| ["建" , -2352 ], | |
| ["当" , -3885 ], | |
| ["得" , 1905 ], | |
| ["思" , -1291 ], | |
| ["性" , 1822 ], | |
| ["戸" , -488 ], | |
| ["指" , -3973 ], | |
| ["政" , -2013 ], | |
| ["教" , -1479 ], | |
| ["数" , 3222 ], | |
| ["文" , -1489 ], | |
| ["新" , 1764 ], | |
| ["日" , 2099 ], | |
| ["旧" , 5792 ], | |
| ["昨" , -661 ], | |
| ["時" , -1248 ], | |
| ["曜" , -951 ], | |
| ["最" , -937 ], | |
| ["月" , 4125 ], | |
| ["期" , 360 ], | |
| ["李" , 3094 ], | |
| ["村" , 364 ], | |
| ["東" , -805 ], | |
| ["核" , 5156 ], | |
| ["森" , 2438 ], | |
| ["業" , 484 ], | |
| ["氏" , 2613 ], | |
| ["民" , -1694 ], | |
| ["決" , -1073 ], | |
| ["法" , 1868 ], | |
| ["海" , -495 ], | |
| ["無" , 979 ], | |
| ["物" , 461 ], | |
| ["特" , -3850 ], | |
| ["生" , -273 ], | |
| ["用" , 914 ], | |
| ["町" , 1215 ], | |
| ["的" , 7313 ], | |
| ["直" , -1835 ], | |
| ["省" , 792 ], | |
| ["県" , 6293 ], | |
| ["知" , -1528 ], | |
| ["私" , 4231 ], | |
| ["税" , 401 ], | |
| ["立" , -960 ], | |
| ["第" , 1201 ], | |
| ["米" , 7767 ], | |
| ["系" , 3066 ], | |
| ["約" , 3663 ], | |
| ["級" , 1384 ], | |
| ["統" , -4229 ], | |
| ["総" , 1163 ], | |
| ["線" , 1255 ], | |
| ["者" , 6457 ], | |
| ["能" , 725 ], | |
| ["自" , -2869 ], | |
| ["英" , 785 ], | |
| ["見" , 1044 ], | |
| ["調" , -562 ], | |
| ["財" , -733 ], | |
| ["費" , 1777 ], | |
| ["車" , 1835 ], | |
| ["軍" , 1375 ], | |
| ["込" , -1504 ], | |
| ["通" , -1136 ], | |
| ["選" , -681 ], | |
| ["郎" , 1026 ], | |
| ["郡" , 4404 ], | |
| ["部" , 1200 ], | |
| ["金" , 2163 ], | |
| ["長" , 421 ], | |
| ["開" , -1432 ], | |
| ["間" , 1302 ], | |
| ["関" , -1282 ], | |
| ["雨" , 2009 ], | |
| ["電" , -1045 ], | |
| ["非" , 2066 ], | |
| ["駅" , 1620 ], | |
| ["1" , -800 ], | |
| ["」" , 2670 ], | |
| ["・" , -3794 ], | |
| ["ッ" , -1350 ], | |
| ["ア" , 551 ], | |
| ["グ" , 1319 ], | |
| ["ス" , 874 ], | |
| ["ト" , 521 ], | |
| ["ム" , 1109 ], | |
| ["ル" , 1591 ], | |
| ["ロ" , 2201 ], | |
| ["ン" , 278] | |
| ]); | |
| var UW4 = new Map([ | |
| ["," , 3930 ], | |
| ["." , 3508 ], | |
| ["―" , -4841 ], | |
| ["、" , 3930 ], | |
| ["。" , 3508 ], | |
| ["〇" , 4999 ], | |
| ["「" , 1895 ], | |
| ["」" , 3798 ], | |
| ["〓" , -5156 ], | |
| ["あ" , 4752 ], | |
| ["い" , -3435 ], | |
| ["う" , -640 ], | |
| ["え" , -2514 ], | |
| ["お" , 2405 ], | |
| ["か" , 530 ], | |
| ["が" , 6006 ], | |
| ["き" , -4482 ], | |
| ["ぎ" , -3821 ], | |
| ["く" , -3788 ], | |
| ["け" , -4376 ], | |
| ["げ" , -4734 ], | |
| ["こ" , 2255 ], | |
| ["ご" , 1979 ], | |
| ["さ" , 2864 ], | |
| ["し" , -843 ], | |
| ["じ" , -2506 ], | |
| ["す" , -731 ], | |
| ["ず" , 1251 ], | |
| ["せ" , 181 ], | |
| ["そ" , 4091 ], | |
| ["た" , 5034 ], | |
| ["だ" , 5408 ], | |
| ["ち" , -3654 ], | |
| ["っ" , -5882 ], | |
| ["つ" , -1659 ], | |
| ["て" , 3994 ], | |
| ["で" , 7410 ], | |
| ["と" , 4547 ], | |
| ["な" , 5433 ], | |
| ["に" , 6499 ], | |
| ["ぬ" , 1853 ], | |
| ["ね" , 1413 ], | |
| ["の" , 7396 ], | |
| ["は" , 8578 ], | |
| ["ば" , 1940 ], | |
| ["ひ" , 4249 ], | |
| ["び" , -4134 ], | |
| ["ふ" , 1345 ], | |
| ["へ" , 6665 ], | |
| ["べ" , -744 ], | |
| ["ほ" , 1464 ], | |
| ["ま" , 1051 ], | |
| ["み" , -2082 ], | |
| ["む" , -882 ], | |
| ["め" , -5046 ], | |
| ["も" , 4169 ], | |
| ["ゃ" , -2666 ], | |
| ["や" , 2795 ], | |
| ["ょ" , -1544 ], | |
| ["よ" , 3351 ], | |
| ["ら" , -2922 ], | |
| ["り" , -9726 ], | |
| ["る" , -14896 ], | |
| ["れ" , -2613 ], | |
| ["ろ" , -4570 ], | |
| ["わ" , -1783 ], | |
| ["を" , 13150 ], | |
| ["ん" , -2352 ], | |
| ["カ" , 2145 ], | |
| ["コ" , 1789 ], | |
| ["セ" , 1287 ], | |
| ["ッ" , -724 ], | |
| ["ト" , -403 ], | |
| ["メ" , -1635 ], | |
| ["ラ" , -881 ], | |
| ["リ" , -541 ], | |
| ["ル" , -856 ], | |
| ["ン" , -3637 ], | |
| ["・" , -4371 ], | |
| ["ー" , -11870 ], | |
| ["一" , -2069 ], | |
| ["中" , 2210 ], | |
| ["予" , 782 ], | |
| ["事" , -190 ], | |
| ["井" , -1768 ], | |
| ["人" , 1036 ], | |
| ["以" , 544 ], | |
| ["会" , 950 ], | |
| ["体" , -1286 ], | |
| ["作" , 530 ], | |
| ["側" , 4292 ], | |
| ["先" , 601 ], | |
| ["党" , -2006 ], | |
| ["共" , -1212 ], | |
| ["内" , 584 ], | |
| ["円" , 788 ], | |
| ["初" , 1347 ], | |
| ["前" , 1623 ], | |
| ["副" , 3879 ], | |
| ["力" , -302 ], | |
| ["動" , -740 ], | |
| ["務" , -2715 ], | |
| ["化" , 776 ], | |
| ["区" , 4517 ], | |
| ["協" , 1013 ], | |
| ["参" , 1555 ], | |
| ["合" , -1834 ], | |
| ["和" , -681 ], | |
| ["員" , -910 ], | |
| ["器" , -851 ], | |
| ["回" , 1500 ], | |
| ["国" , -619 ], | |
| ["園" , -1200 ], | |
| ["地" , 866 ], | |
| ["場" , -1410 ], | |
| ["塁" , -2094 ], | |
| ["士" , -1413 ], | |
| ["多" , 1067 ], | |
| ["大" , 571 ], | |
| ["子" , -4802 ], | |
| ["学" , -1397 ], | |
| ["定" , -1057 ], | |
| ["寺" , -809 ], | |
| ["小" , 1910 ], | |
| ["屋" , -1328 ], | |
| ["山" , -1500 ], | |
| ["島" , -2056 ], | |
| ["川" , -2667 ], | |
| ["市" , 2771 ], | |
| ["年" , 374 ], | |
| ["庁" , -4556 ], | |
| ["後" , 456 ], | |
| ["性" , 553 ], | |
| ["感" , 916 ], | |
| ["所" , -1566 ], | |
| ["支" , 856 ], | |
| ["改" , 787 ], | |
| ["政" , 2182 ], | |
| ["教" , 704 ], | |
| ["文" , 522 ], | |
| ["方" , -856 ], | |
| ["日" , 1798 ], | |
| ["時" , 1829 ], | |
| ["最" , 845 ], | |
| ["月" , -9066 ], | |
| ["木" , -485 ], | |
| ["来" , -442 ], | |
| ["校" , -360 ], | |
| ["業" , -1043 ], | |
| ["氏" , 5388 ], | |
| ["民" , -2716 ], | |
| ["気" , -910 ], | |
| ["沢" , -939 ], | |
| ["済" , -543 ], | |
| ["物" , -735 ], | |
| ["率" , 672 ], | |
| ["球" , -1267 ], | |
| ["生" , -1286 ], | |
| ["産" , -1101 ], | |
| ["田" , -2900 ], | |
| ["町" , 1826 ], | |
| ["的" , 2586 ], | |
| ["目" , 922 ], | |
| ["省" , -3485 ], | |
| ["県" , 2997 ], | |
| ["空" , -867 ], | |
| ["立" , -2112 ], | |
| ["第" , 788 ], | |
| ["米" , 2937 ], | |
| ["系" , 786 ], | |
| ["約" , 2171 ], | |
| ["経" , 1146 ], | |
| ["統" , -1169 ], | |
| ["総" , 940 ], | |
| ["線" , -994 ], | |
| ["署" , 749 ], | |
| ["者" , 2145 ], | |
| ["能" , -730 ], | |
| ["般" , -852 ], | |
| ["行" , -792 ], | |
| ["規" , 792 ], | |
| ["警" , -1184 ], | |
| ["議" , -244 ], | |
| ["谷" , -1000 ], | |
| ["賞" , 730 ], | |
| ["車" , -1481 ], | |
| ["軍" , 1158 ], | |
| ["輪" , -1433 ], | |
| ["込" , -3370 ], | |
| ["近" , 929 ], | |
| ["道" , -1291 ], | |
| ["選" , 2596 ], | |
| ["郎" , -4866 ], | |
| ["都" , 1192 ], | |
| ["野" , -1100 ], | |
| ["銀" , -2213 ], | |
| ["長" , 357 ], | |
| ["間" , -2344 ], | |
| ["院" , -2297 ], | |
| ["際" , -2604 ], | |
| ["電" , -878 ], | |
| ["領" , -1659 ], | |
| ["題" , -792 ], | |
| ["館" , -1984 ], | |
| ["首" , 1749 ], | |
| ["高" , 2120 ], | |
| ["「" , 1895 ], | |
| ["」" , 3798 ], | |
| ["・" , -4371 ], | |
| ["ッ" , -724 ], | |
| ["ー" , -11870 ], | |
| ["カ" , 2145 ], | |
| ["コ" , 1789 ], | |
| ["セ" , 1287 ], | |
| ["ト" , -403 ], | |
| ["メ" , -1635 ], | |
| ["ラ" , -881 ], | |
| ["リ" , -541 ], | |
| ["ル" , -856 ], | |
| ["ン" , -3637] | |
| ]); | |
| var UW5 = new Map([ | |
| ["," , 465 ], | |
| ["." , -299 ], | |
| ["1" , -514 ], | |
| ["E2" , -32768 ], | |
| ["]" , -2762 ], | |
| ["、" , 465 ], | |
| ["。" , -299 ], | |
| ["「" , 363 ], | |
| ["あ" , 1655 ], | |
| ["い" , 331 ], | |
| ["う" , -503 ], | |
| ["え" , 1199 ], | |
| ["お" , 527 ], | |
| ["か" , 647 ], | |
| ["が" , -421 ], | |
| ["き" , 1624 ], | |
| ["ぎ" , 1971 ], | |
| ["く" , 312 ], | |
| ["げ" , -983 ], | |
| ["さ" , -1537 ], | |
| ["し" , -1371 ], | |
| ["す" , -852 ], | |
| ["だ" , -1186 ], | |
| ["ち" , 1093 ], | |
| ["っ" , 52 ], | |
| ["つ" , 921 ], | |
| ["て" , -18 ], | |
| ["で" , -850 ], | |
| ["と" , -127 ], | |
| ["ど" , 1682 ], | |
| ["な" , -787 ], | |
| ["に" , -1224 ], | |
| ["の" , -635 ], | |
| ["は" , -578 ], | |
| ["べ" , 1001 ], | |
| ["み" , 502 ], | |
| ["め" , 865 ], | |
| ["ゃ" , 3350 ], | |
| ["ょ" , 854 ], | |
| ["り" , -208 ], | |
| ["る" , 429 ], | |
| ["れ" , 504 ], | |
| ["わ" , 419 ], | |
| ["を" , -1264 ], | |
| ["ん" , 327 ], | |
| ["イ" , 241 ], | |
| ["ル" , 451 ], | |
| ["ン" , -343 ], | |
| ["中" , -871 ], | |
| ["京" , 722 ], | |
| ["会" , -1153 ], | |
| ["党" , -654 ], | |
| ["務" , 3519 ], | |
| ["区" , -901 ], | |
| ["告" , 848 ], | |
| ["員" , 2104 ], | |
| ["大" , -1296 ], | |
| ["学" , -548 ], | |
| ["定" , 1785 ], | |
| ["嵐" , -1304 ], | |
| ["市" , -2991 ], | |
| ["席" , 921 ], | |
| ["年" , 1763 ], | |
| ["思" , 872 ], | |
| ["所" , -814 ], | |
| ["挙" , 1618 ], | |
| ["新" , -1682 ], | |
| ["日" , 218 ], | |
| ["月" , -4353 ], | |
| ["査" , 932 ], | |
| ["格" , 1356 ], | |
| ["機" , -1508 ], | |
| ["氏" , -1347 ], | |
| ["田" , 240 ], | |
| ["町" , -3912 ], | |
| ["的" , -3149 ], | |
| ["相" , 1319 ], | |
| ["省" , -1052 ], | |
| ["県" , -4003 ], | |
| ["研" , -997 ], | |
| ["社" , -278 ], | |
| ["空" , -813 ], | |
| ["統" , 1955 ], | |
| ["者" , -2233 ], | |
| ["表" , 663 ], | |
| ["語" , -1073 ], | |
| ["議" , 1219 ], | |
| ["選" , -1018 ], | |
| ["郎" , -368 ], | |
| ["長" , 786 ], | |
| ["間" , 1191 ], | |
| ["題" , 2368 ], | |
| ["館" , -689 ], | |
| ["1" , -514 ], | |
| ["E2" , -32768 ], | |
| ["「" , 363 ], | |
| ["イ" , 241 ], | |
| ["ル" , 451 ], | |
| ["ン" , -343] | |
| ]); | |
| var UW6 = new Map([ | |
| ["," , 227 ], | |
| ["." , 808 ], | |
| ["1" , -270 ], | |
| ["E1" , 306 ], | |
| ["、" , 227 ], | |
| ["。" , 808 ], | |
| ["あ" , -307 ], | |
| ["う" , 189 ], | |
| ["か" , 241 ], | |
| ["が" , -73 ], | |
| ["く" , -121 ], | |
| ["こ" , -200 ], | |
| ["じ" , 1782 ], | |
| ["す" , 383 ], | |
| ["た" , -428 ], | |
| ["っ" , 573 ], | |
| ["て" , -1014 ], | |
| ["で" , 101 ], | |
| ["と" , -105 ], | |
| ["な" , -253 ], | |
| ["に" , -149 ], | |
| ["の" , -417 ], | |
| ["は" , -236 ], | |
| ["も" , -206 ], | |
| ["り" , 187 ], | |
| ["る" , -135 ], | |
| ["を" , 195 ], | |
| ["ル" , -673 ], | |
| ["ン" , -496 ], | |
| ["一" , -277 ], | |
| ["中" , 201 ], | |
| ["件" , -800 ], | |
| ["会" , 624 ], | |
| ["前" , 302 ], | |
| ["区" , 1792 ], | |
| ["員" , -1212 ], | |
| ["委" , 798 ], | |
| ["学" , -960 ], | |
| ["市" , 887 ], | |
| ["広" , -695 ], | |
| ["後" , 535 ], | |
| ["業" , -697 ], | |
| ["相" , 753 ], | |
| ["社" , -507 ], | |
| ["福" , 974 ], | |
| ["空" , -822 ], | |
| ["者" , 1811 ], | |
| ["連" , 463 ], | |
| ["郎" , 1082 ], | |
| ["1" , -270 ], | |
| ["E1" , 306 ], | |
| ["ル" , -673 ], | |
| ["ン" , -496] | |
| ]); | |
| var CharMap = (() =>{ | |
| var m = new Map(); | |
| for ( var s = '一'.charCodeAt(), e = '龠'.charCodeAt(); s <= e; s++ ) { | |
| m.set( String.fromCharCode(s), 'H' ); | |
| } | |
| for ( var s = 'ぁ'.charCodeAt(), e = 'ん'.charCodeAt(); s <= e; s++ ) { | |
| m.set( String.fromCharCode(s), 'I' ); | |
| } | |
| for ( var s = 'ァ'.charCodeAt(), e = 'ヴ'.charCodeAt(); s <= e; s++ ) { | |
| m.set( String.fromCharCode(s), 'K' ); | |
| } | |
| for ( var s = 'ァ'.charCodeAt(), e = 'ン'.charCodeAt(); s <= e; s++ ) { | |
| m.set( String.fromCharCode(s), 'K' ); | |
| } | |
| for ( var s = 'a'.charCodeAt(), e = 'z'.charCodeAt(); s <= e; s++ ) { | |
| m.set( String.fromCharCode(s), 'A' ); | |
| } | |
| for ( var s = 'A'.charCodeAt(), e = 'Z'.charCodeAt(); s <= e; s++ ) { | |
| m.set( String.fromCharCode(s), 'A' ); | |
| } | |
| for ( var s = 'a'.charCodeAt(), e = 'z'.charCodeAt(); s <= e; s++ ) { | |
| m.set( String.fromCharCode(s), 'A' ); | |
| } | |
| for ( var s = 'A'.charCodeAt(), e = 'Z'.charCodeAt(); s <= e; s++ ) { | |
| m.set( String.fromCharCode(s), 'A' ); | |
| } | |
| for ( var s = '0'.charCodeAt(), e = '9'.charCodeAt(); s <= e; s++ ) { | |
| m.set( String.fromCharCode(s), 'N' ); | |
| } | |
| for ( var s = '0'.charCodeAt(), e = '9'.charCodeAt(); s <= e; s++ ) { | |
| m.set( String.fromCharCode(s), 'N' ); | |
| } | |
| '一二三四五六七八九十百千万億兆'.split('').forEach((c) => { | |
| m.set( c, 'M' ); | |
| }); | |
| '々〆ヵヶ'.split('').forEach((c) => { | |
| m.set( c, 'H' ); | |
| }); | |
| return m; | |
| })(); | |
| function tokenize(text) { | |
| if ( typeof(text) === 'undefined' || text === null || ! text instanceof String || text === "" ) { | |
| return []; | |
| } | |
| var result = []; | |
| var idx = 0; | |
| var start = idx; | |
| var last = text.length - 1; | |
| var p1 = 'U' | |
| , p2 = 'U' | |
| , p3 = 'U' | |
| ; | |
| var w1 = 'B3' | |
| , w2 = 'B2' | |
| , w3 = 'B1' | |
| ; | |
| var c1 = 'O' | |
| , c2 = 'O' | |
| , c3 = 'O' | |
| ; | |
| var w4 = text[idx]; | |
| var c4 = CharMap.get(w4) || 'O'; | |
| var idx1 = idx + 1; | |
| var idx2 = idx + 2; | |
| var idx3 = null; | |
| var w5, w6, c5, c6 ; | |
| if ( idx === last ) { | |
| w5 = 'E1'; | |
| w6 = 'E2'; | |
| c5 = 'O'; | |
| c6 = 'O'; | |
| } else { | |
| w5 = text[idx1]; | |
| c5 = CharMap.get(w5) || 'O'; | |
| if ( idx1 === last ) { | |
| w6 = 'E1'; | |
| c5 = 'O'; | |
| } else { | |
| w6 = text[idx2]; | |
| c6 = CharMap.get(w6) || 'O'; | |
| } | |
| } | |
| while ( idx < last ) { | |
| var score = BIAS; | |
| w1 = w2; | |
| w2 = w3; | |
| w3 = w4; | |
| w4 = w5; | |
| w5 = w6; | |
| c1 = c2; | |
| c2 = c3; | |
| c3 = c4; | |
| c4 = c5; | |
| c5 = c6; | |
| idx3 = idx + 3; | |
| if ( idx3 <= last ) { | |
| w6 = text[idx3]; | |
| c6 = CharMap.get(w6) || 'O'; | |
| } else if ( idx2 === last ) { | |
| w6 = 'E1'; | |
| c6 = 'O'; | |
| } else { | |
| w6 = 'E2'; | |
| c6 = 'O'; | |
| } | |
| if ( p1 === 'O' ) { | |
| score += -214; | |
| } | |
| if ( p2 === 'B' ) { | |
| score += 69; | |
| } else if ( p2 === 'O' ) { | |
| score += 935; | |
| } | |
| if ( p3 === 'B' ) { | |
| score += 189; | |
| } | |
| score += (BP1.get(p1 + p2) || 0); | |
| score += (BP2.get(p2 + p3) || 0); | |
| score += (UW1.get(w1) || 0); | |
| score += (UW2.get(w2) || 0); | |
| score += (UW3.get(w3) || 0); | |
| score += (UW4.get(w4) || 0); | |
| score += (UW5.get(w5) || 0); | |
| score += (UW6.get(w6) || 0); | |
| score += (BW1.get(w2 + w3) || 0); | |
| score += (BW2.get(w3 + w4) || 0); | |
| score += (BW3.get(w4 + w5) || 0); | |
| score += (TW1.get(w1 + w2 + w3) || 0); | |
| score += (TW2.get(w2 + w3 + w4) || 0); | |
| score += (TW3.get(w3 + w4 + w5) || 0); | |
| score += (TW4.get(w4 + w5 + w6) || 0); | |
| score += (UC1.get(c1) || 0); | |
| score += (UC2.get(c2) || 0); | |
| if ( c3 === 'A' ) { | |
| score += -1370; | |
| } else if ( c3 === 'I' ) { | |
| score += 2311; | |
| } | |
| score += (UC4.get(c4) || 0); | |
| score += (UC5.get(c5) || 0); | |
| score += (UC6.get(c6) || 0); | |
| score += (BC1.get(c2 + c3) || 0); | |
| score += (BC2.get(c3 + c4) || 0); | |
| score += (BC3.get(c4 + c5) || 0); | |
| score += (TC1.get(c1 + c2 + c3) || 0); | |
| score += (TC2.get(c2 + c3 + c4) || 0); | |
| score += (TC3.get(c3 + c4 + c5) || 0); | |
| score += (TC4.get(c4 + c5 + c6) || 0); | |
| score += (UQ1.get(p1 + c1) || 0); | |
| score += (UQ2.get(p2 + c2) || 0); | |
| score += (UQ3.get(p3 + c3) || 0); | |
| score += (BQ1.get(p2 + c2 + c3) || 0); | |
| score += (BQ2.get(p2 + c3 + c4) || 0); | |
| score += (BQ3.get(p3 + c2 + c3) || 0); | |
| score += (BQ4.get(p3 + c3 + c4) || 0); | |
| score += (TQ1.get(p2 + c1 + c2 + c3) || 0); | |
| score += (TQ2.get(p2 + c2 + c3 + c4) || 0); | |
| score += (TQ3.get(p3 + c1 + c2 + c3) || 0); | |
| score += (TQ4.get(p3 + c2 + c3 + c4) || 0); | |
| var p = 'O'; | |
| if ( score > 0 ) { | |
| result[ result.length ] = text.substring(start, idx1); | |
| start = idx1; | |
| p = 'B'; | |
| } | |
| p1 = p2; | |
| p2 = p3; | |
| p3 = p; | |
| idx = idx1; | |
| idx1 = idx2; | |
| idx2 = idx3; | |
| } | |
| result[ result.length ] = text.substring(start, idx1); | |
| return result; | |
| } | |
| module.exports = tokenize; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment