Created
January 2, 2020 14:25
-
-
Save syphoxy/36b2ba8cf33ad9e6cde5347a7bd56773 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package main | |
| import ( | |
| "encoding/xml" | |
| "fmt" | |
| "log" | |
| "os" | |
| "sort" | |
| "github.com/ikawaha/kagome/tokenizer" | |
| ) | |
| const ( | |
| rootDir = "/home/USERNAME/.local/share/Steam/steamapps/common/Sid Meier's Civilization VI" | |
| jaStringFile = rootDir + "/steamassets/base/assets/text/vanilla_ja_jp.xml" | |
| ) | |
| var enStringFiles = []string{ | |
| rootDir + "/steamassets/base/assets/text/en_us/advisortext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/ancientrivalsscenario_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/australia_packagetext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/automation_narration_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/aztec_montezuma_configtext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/aztec_montezuma_gameplaytext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/aztec_montezuma_packagetext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/balancemaps_configtext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/benchmarktext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/building_huey_teocalli_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/buildings_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/citizennames_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/citynames_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civics_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilizations_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_buildings_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_citystates_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_civics_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_civilizations_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_concepts_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_districts_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_features_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_governments_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_greatpeople_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_improvements_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_leaders_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_promotions_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_religions_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_resources_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_search_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_technologies_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_units_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civilopedia_wonders_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civroyalescenario_frontendtext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civroyalescenario_ingametext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/civroyalescenario_packagetext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/coldwarscenario_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/combat_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/credits.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/difficulties_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacydeals_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacymodifiers_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacynotifications_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacypanel_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_common_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_declarefriendship_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_declarewar_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_defeat_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_delegation_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_denounce_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_embassy_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_firstmeet_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_greeting_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_kudosandwarnings_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_makealliance_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_makedeal_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_makedemand_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_makepeace_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_openborders_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_warning_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/districts_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/earthmaps_configtext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/espionage_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/expansion1_credits.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/expansion1_packagetext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/expansion2_credits.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/expansion2_packagetext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/featuresandterrains_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/frontend_playbycloud.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/frontendtext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/gamesummaries_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/gossip_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/grammar_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/greatpeople_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/greatworks_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/improvements_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/indonesia_khmer_packagetext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/ingame_playbycloud.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/ingametext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/leaders_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/macedonia_persia_packagetext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/modifiers_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/multiplayerfrontendtext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/my2ktext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/napoleonscenario_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/notifications_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/nubia_packagetext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/poland_packagetext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/projects_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/promotions_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/quests_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/quotes_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/religion_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/religiouscombatscenario_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/routes_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/technologies_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/types_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/ui_civilopedia_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/ui_endgame_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/ui_gamesummaries_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/ui_mods_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/ui_options_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/ui_technology_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/unit_abilities_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/unitnames_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/units_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/usersetupwarning_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/vikings_packagetext.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/wars_text.xml", | |
| rootDir + "/steamassets/base/assets/text/en_us/worldbuilder_text.xml", | |
| } | |
| type enRowXML struct { | |
| XMLName xml.Name `xml:"Row"` | |
| Tag string `xml:"Tag,attr"` | |
| Text string `xml:"Text"` | |
| } | |
| type enGameDataXML struct { | |
| XMLName xml.Name `xml:"GameData"` | |
| FrontEndText []enRowXML `xml:"FrontEndText>Row"` | |
| BaseGameText []enRowXML `xml:"BaseGameText>Row"` | |
| EnglishText []enRowXML `xml:"EnglishText>Row"` | |
| } | |
| type jaReplaceXML struct { | |
| XMLName xml.Name `xml:"Replace"` | |
| Tag string `xml:"Tag,attr"` | |
| Text string `xml:"Text"` | |
| } | |
| type jaGameDataXML struct { | |
| XMLName xml.Name `xml:"GameData"` | |
| LocalizedText []jaReplaceXML `xml:"LocalizedText>Replace"` | |
| } | |
| type translation struct { | |
| English string | |
| Japanese string | |
| } | |
| func main() { | |
| t := tokenizer.New() | |
| freq := make(map[string]int64) | |
| // load japanese string assets | |
| // also generate frequency map | |
| f, err := os.Open(jaStringFile) | |
| if err != nil { | |
| log.Fatal(err) | |
| } | |
| defer f.Close() | |
| data := jaGameDataXML{} | |
| if err := xml.NewDecoder(f).Decode(&data); err != nil { | |
| log.Fatal(err) | |
| } | |
| translations := make(map[string]translation) | |
| for _, i := range data.LocalizedText { | |
| x := translations[i.Tag] | |
| x.Japanese = i.Text | |
| translations[i.Tag] = x | |
| for _, j := range t.Analyze(i.Text, tokenizer.Search) { | |
| if j.Class != tokenizer.KNOWN { | |
| continue | |
| } | |
| f := j.Features() | |
| if f[0] != "名詞" || f[1] != "一般" { | |
| continue | |
| } | |
| freq[j.Surface] += 1 | |
| } | |
| } | |
| // load english string assets | |
| for _, fp := range enStringFiles { | |
| f, err := os.Open(fp) | |
| if err != nil { | |
| log.Fatal(err) | |
| } | |
| data := enGameDataXML{} | |
| if err := xml.NewDecoder(f).Decode(&data); err != nil { | |
| log.Fatal(err) | |
| } | |
| for _, i := range data.FrontEndText { | |
| x := translations[i.Tag] | |
| x.English = i.Text | |
| translations[i.Tag] = x | |
| } | |
| for _, i := range data.BaseGameText { | |
| x := translations[i.Tag] | |
| x.English = i.Text | |
| translations[i.Tag] = x | |
| } | |
| for _, i := range data.EnglishText { | |
| x := translations[i.Tag] | |
| x.English = i.Text | |
| translations[i.Tag] = x | |
| } | |
| } | |
| words := make([]wordFreq, 0, len(freq)) | |
| for k, v := range freq { | |
| words = append(words, wordFreq{word: k, freq: v}) | |
| } | |
| sort.Slice(words, func(i, j int) bool { return words[i].freq > words[j].freq }) | |
| for x, w := range words { | |
| fmt.Println(x, w) | |
| } | |
| } | |
| type wordFreq struct { | |
| word string | |
| freq int64 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment