Cryolitia · April 15, 2025 09:05
diff --git a/linux-font-analytics.py b/linux-font-analytics.py
 #! /usr/bin/python3

 # Usage:
 # fc-list --format "%{family[0]} %{familylang}\n" > <file_path>
 # python3 process.py <file_path>

 import sys
 import json
 import re

 # 计算子叶数量
 def count_leaves(node):
    if not node:  # 如果当前节点没有子节点
        return 1
    return sum(count_leaves(child) for child in node.values())

 # 合并唯一子节点
 def merge_trie(trie):
    keys = list(trie.keys())
    for key in keys:
        subtree = trie[key]
        merge_trie(subtree)  # 递归处理子树
        if len(subtree) == 1:  # 如果子树只有一个子节点
            child_key = next(iter(subtree))  # 获取唯一子节点的键
            trie[f"{key} {child_key}"] = subtree[child_key]  # 合并键
            del trie[key]  # 删除原始键

 # 打印字典树（递归排序每一层）
 def print_trie(trie, level=0):
    sorted_items = sorted(trie.items(), key=lambda item: count_leaves(item[1]), reverse=True)
    for key, subtree in sorted_items:
        count_leavess_count = count_leaves(subtree)
        if count_leavess_count > 1:
            print("  " * level + f"- {key} ({count_leaves(subtree)})")
        else:
            print("  " * level + f"- {key}")
        print_trie(subtree, level + 1)

 langcode = re.compile(f"[a-z]+(-[a-z]+)?(|[a-z]+(-[a-z]+)?)*")

 def main():
    if len(sys.argv) != 2:
        print("Usage: python3 process.py <file_path>")
        sys.exit(1)

    file_path = sys.argv[1]

    # 读取文件内容
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            lines = list(set([line.strip() for line in file.readlines()]))
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        sys.exit(1)
    except Exception as e:
        print(f"Error: {e}")
        sys.exit(1)

    # 构建字典树
    trie = {}
    langs = []
    for line in lines:
        words = line.split()  # 按空格切分
        if langcode.match(words[-1]):
            langs.append(set(words[-1].split("|")))  # 最后一列为语言
            words = words[:-1]
        node = trie
        for word in words:
            if word not in node:
                node[word] = {}
            node = node[word]

    # 合并唯一子节点
    merge_trie(trie)

    # 打印字体数量
    print(f"Total Fonts: {len(lines)}")

    # 打印字典树
    print("\nTrie Structure:")
    print_trie(trie)

    # 统计语言数量
    lang_count = {}
    for lang_set in langs:
        for lang in lang_set:
            if lang not in lang_count:
                lang_count[lang] = 0
            lang_count[lang] += 1
    
    # 排序语言数量
    sorted_lang_count = sorted(lang_count.items(), key=lambda item: item[1], reverse=True)

    # 打印语言数量
    print(f"\nLanguage Count: {len(sorted_lang_count)}")

    # 打印语言
    print("\nLanguages:")
    for lang, count in sorted_lang_count:
        print(f"{lang}:\t\t\t{count}")


 if __name__ == "__main__":
    main()
	#! /usr/bin/python3

	# Usage:
	# fc-list --format "%{family[0]} %{familylang}\n" > <file_path>
	# python3 process.py <file_path>

	import sys
	import json
	import re

	# 计算子叶数量
	def count_leaves(node):
	if not node: # 如果当前节点没有子节点
	return 1
	return sum(count_leaves(child) for child in node.values())

	# 合并唯一子节点
	def merge_trie(trie):
	keys = list(trie.keys())
	for key in keys:
	subtree = trie[key]
	merge_trie(subtree) # 递归处理子树
	if len(subtree) == 1: # 如果子树只有一个子节点
	child_key = next(iter(subtree)) # 获取唯一子节点的键
	trie[f"{key} {child_key}"] = subtree[child_key] # 合并键
	del trie[key] # 删除原始键

	# 打印字典树（递归排序每一层）
	def print_trie(trie, level=0):
	sorted_items = sorted(trie.items(), key=lambda item: count_leaves(item[1]), reverse=True)
	for key, subtree in sorted_items:
	count_leavess_count = count_leaves(subtree)
	if count_leavess_count > 1:
	print(" " * level + f"- {key} ({count_leaves(subtree)})")
	else:
	print(" " * level + f"- {key}")
	print_trie(subtree, level + 1)

	langcode = re.compile(f"[a-z]+(-[a-z]+)?(\|[a-z]+(-[a-z]+)?)*")

	def main():
	if len(sys.argv) != 2:
	print("Usage: python3 process.py <file_path>")
	sys.exit(1)

	file_path = sys.argv[1]

	# 读取文件内容
	try:
	with open(file_path, 'r', encoding='utf-8') as file:
	lines = list(set([line.strip() for line in file.readlines()]))
	except FileNotFoundError:
	print(f"Error: File '{file_path}' not found.")
	sys.exit(1)
	except Exception as e:
	print(f"Error: {e}")
	sys.exit(1)

	# 构建字典树
	trie = {}
	langs = []
	for line in lines:
	words = line.split() # 按空格切分
	if langcode.match(words[-1]):
	langs.append(set(words[-1].split("\|"))) # 最后一列为语言
	words = words[:-1]
	node = trie
	for word in words:
	if word not in node:
	node[word] = {}
	node = node[word]

	# 合并唯一子节点
	merge_trie(trie)

	# 打印字体数量
	print(f"Total Fonts: {len(lines)}")

	# 打印字典树
	print("\nTrie Structure:")
	print_trie(trie)

	# 统计语言数量
	lang_count = {}
	for lang_set in langs:
	for lang in lang_set:
	if lang not in lang_count:
	lang_count[lang] = 0
	lang_count[lang] += 1

	# 排序语言数量
	sorted_lang_count = sorted(lang_count.items(), key=lambda item: item[1], reverse=True)

	# 打印语言数量
	print(f"\nLanguage Count: {len(sorted_lang_count)}")

	# 打印语言
	print("\nLanguages:")
	for lang, count in sorted_lang_count:
	print(f"{lang}:\t\t\t{count}")


	if __name__ == "__main__":
	main()
No results found