Last active
April 15, 2025 09:05
-
-
Save Cryolitia/116be83578eed38e486d7e71c40ab5f1 to your computer and use it in GitHub Desktop.
linux-font-analytics
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #! /usr/bin/python3 | |
| # Usage: | |
| # fc-list --format "%{family[0]} %{familylang}\n" > <file_path> | |
| # python3 process.py <file_path> | |
| import sys | |
| import json | |
| import re | |
| # 计算子叶数量 | |
| def count_leaves(node): | |
| if not node: # 如果当前节点没有子节点 | |
| return 1 | |
| return sum(count_leaves(child) for child in node.values()) | |
| # 合并唯一子节点 | |
| def merge_trie(trie): | |
| keys = list(trie.keys()) | |
| for key in keys: | |
| subtree = trie[key] | |
| merge_trie(subtree) # 递归处理子树 | |
| if len(subtree) == 1: # 如果子树只有一个子节点 | |
| child_key = next(iter(subtree)) # 获取唯一子节点的键 | |
| trie[f"{key} {child_key}"] = subtree[child_key] # 合并键 | |
| del trie[key] # 删除原始键 | |
| # 打印字典树(递归排序每一层) | |
| def print_trie(trie, level=0): | |
| sorted_items = sorted(trie.items(), key=lambda item: count_leaves(item[1]), reverse=True) | |
| for key, subtree in sorted_items: | |
| count_leavess_count = count_leaves(subtree) | |
| if count_leavess_count > 1: | |
| print(" " * level + f"- {key} ({count_leaves(subtree)})") | |
| else: | |
| print(" " * level + f"- {key}") | |
| print_trie(subtree, level + 1) | |
| langcode = re.compile(f"[a-z]+(-[a-z]+)?(|[a-z]+(-[a-z]+)?)*") | |
| def main(): | |
| if len(sys.argv) != 2: | |
| print("Usage: python3 process.py <file_path>") | |
| sys.exit(1) | |
| file_path = sys.argv[1] | |
| # 读取文件内容 | |
| try: | |
| with open(file_path, 'r', encoding='utf-8') as file: | |
| lines = list(set([line.strip() for line in file.readlines()])) | |
| except FileNotFoundError: | |
| print(f"Error: File '{file_path}' not found.") | |
| sys.exit(1) | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| sys.exit(1) | |
| # 构建字典树 | |
| trie = {} | |
| langs = [] | |
| for line in lines: | |
| words = line.split() # 按空格切分 | |
| if langcode.match(words[-1]): | |
| langs.append(set(words[-1].split("|"))) # 最后一列为语言 | |
| words = words[:-1] | |
| node = trie | |
| for word in words: | |
| if word not in node: | |
| node[word] = {} | |
| node = node[word] | |
| # 合并唯一子节点 | |
| merge_trie(trie) | |
| # 打印字体数量 | |
| print(f"Total Fonts: {len(lines)}") | |
| # 打印字典树 | |
| print("\nTrie Structure:") | |
| print_trie(trie) | |
| # 统计语言数量 | |
| lang_count = {} | |
| for lang_set in langs: | |
| for lang in lang_set: | |
| if lang not in lang_count: | |
| lang_count[lang] = 0 | |
| lang_count[lang] += 1 | |
| # 排序语言数量 | |
| sorted_lang_count = sorted(lang_count.items(), key=lambda item: item[1], reverse=True) | |
| # 打印语言数量 | |
| print(f"\nLanguage Count: {len(sorted_lang_count)}") | |
| # 打印语言 | |
| print("\nLanguages:") | |
| for lang, count in sorted_lang_count: | |
| print(f"{lang}:\t\t\t{count}") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment