Skip to content

Instantly share code, notes, and snippets.

@Cryolitia
Last active April 15, 2025 09:05
Show Gist options
  • Select an option

  • Save Cryolitia/116be83578eed38e486d7e71c40ab5f1 to your computer and use it in GitHub Desktop.

Select an option

Save Cryolitia/116be83578eed38e486d7e71c40ab5f1 to your computer and use it in GitHub Desktop.
linux-font-analytics
#! /usr/bin/python3
# Usage:
# fc-list --format "%{family[0]} %{familylang}\n" > <file_path>
# python3 process.py <file_path>
import sys
import json
import re
# 计算子叶数量
def count_leaves(node):
if not node: # 如果当前节点没有子节点
return 1
return sum(count_leaves(child) for child in node.values())
# 合并唯一子节点
def merge_trie(trie):
keys = list(trie.keys())
for key in keys:
subtree = trie[key]
merge_trie(subtree) # 递归处理子树
if len(subtree) == 1: # 如果子树只有一个子节点
child_key = next(iter(subtree)) # 获取唯一子节点的键
trie[f"{key} {child_key}"] = subtree[child_key] # 合并键
del trie[key] # 删除原始键
# 打印字典树(递归排序每一层)
def print_trie(trie, level=0):
sorted_items = sorted(trie.items(), key=lambda item: count_leaves(item[1]), reverse=True)
for key, subtree in sorted_items:
count_leavess_count = count_leaves(subtree)
if count_leavess_count > 1:
print(" " * level + f"- {key} ({count_leaves(subtree)})")
else:
print(" " * level + f"- {key}")
print_trie(subtree, level + 1)
langcode = re.compile(f"[a-z]+(-[a-z]+)?(|[a-z]+(-[a-z]+)?)*")
def main():
if len(sys.argv) != 2:
print("Usage: python3 process.py <file_path>")
sys.exit(1)
file_path = sys.argv[1]
# 读取文件内容
try:
with open(file_path, 'r', encoding='utf-8') as file:
lines = list(set([line.strip() for line in file.readlines()]))
except FileNotFoundError:
print(f"Error: File '{file_path}' not found.")
sys.exit(1)
except Exception as e:
print(f"Error: {e}")
sys.exit(1)
# 构建字典树
trie = {}
langs = []
for line in lines:
words = line.split() # 按空格切分
if langcode.match(words[-1]):
langs.append(set(words[-1].split("|"))) # 最后一列为语言
words = words[:-1]
node = trie
for word in words:
if word not in node:
node[word] = {}
node = node[word]
# 合并唯一子节点
merge_trie(trie)
# 打印字体数量
print(f"Total Fonts: {len(lines)}")
# 打印字典树
print("\nTrie Structure:")
print_trie(trie)
# 统计语言数量
lang_count = {}
for lang_set in langs:
for lang in lang_set:
if lang not in lang_count:
lang_count[lang] = 0
lang_count[lang] += 1
# 排序语言数量
sorted_lang_count = sorted(lang_count.items(), key=lambda item: item[1], reverse=True)
# 打印语言数量
print(f"\nLanguage Count: {len(sorted_lang_count)}")
# 打印语言
print("\nLanguages:")
for lang, count in sorted_lang_count:
print(f"{lang}:\t\t\t{count}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment