from fontTools.ttLib import TTFont
import unicodedata
import sys

def is_chinese(char):
    return '\u4e00' <= char <= '\u9fff'

def is_english(char):
    return ('A' <= char <= 'Z') or ('a' <= char <= 'z')

def is_digit(char):
    return char.isdigit()

def is_punctuation(char):
    punctuations = (
        r'''!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~'''
        r'。？！，、；：“”‘’（）《》【】—……￥·'
    )
    return char in punctuations

def is_pinyin(char):
    # 汉语拼音主要用带声调的拉丁字母，Unicode范围在\u0100-\u02FF
    # 这里只简单判断常见带声调的拼音字符
    return (
        '\u0100' <= char <= '\u017F' or  # 拉丁扩展A
        '\u01CD' <= char <= '\u01DC' or  # 带声调的拼音
        char in 'āáǎàēéěèīíǐìōóǒòūúǔùǖǘǚǜü'
    )

if len(sys.argv) < 2:
    print("用法: python print.py <font_file>")
    sys.exit(1)

font_path = sys.argv[1]
font = TTFont(font_path)
cmap = font['cmap'].getBestCmap()

chinese_count = 0
total_count = 0
english_count = 0
digit_count = 0
punct_count = 0
pinyin_count = 0
other_count = 0

chinese_chars = []
english_chars = []
digit_chars = []
punct_chars = []
pinyin_chars = []
other_chars = []

for codepoint, name in cmap.items():
    try:
        char = chr(codepoint)
        total_count += 1
        if is_chinese(char):
            chinese_count += 1
            chinese_chars.append(char)
        elif is_pinyin(char):
            pinyin_count += 1
            pinyin_chars.append(char)
        elif is_english(char):
            english_count += 1
            english_chars.append(char)
        elif is_digit(char):
            digit_count += 1
            digit_chars.append(char)
        elif is_punctuation(char):
            punct_count += 1
            punct_chars.append(char)
        else:
            other_count += 1
            other_chars.append(f'{char}(U+{codepoint:04X})')
    except ValueError:
        continue

print(f'总字符数: {total_count}')
print(f'中文字符数: {chinese_count}')
print(f'拼音字符数: {pinyin_count}')
print(f'英文字符数: {english_count}')
print(f'数字字符数: {digit_count}')
print(f'标点符号数: {punct_count}')
print(f'其他字符数: {other_count}')
print(f'中文占比: {chinese_count / total_count:.2%}' if total_count else '中文占比: 0.00%')

print('\n【中文字符】')
print(''.join(chinese_chars))
print('\n【拼音字符】')
print(''.join(pinyin_chars))
print('\n【英文字符】')
print(''.join(english_chars))
print('\n【数字字符】')
print(''.join(digit_chars))
print('\n【标点符号】')
print(''.join(punct_chars))
print('\n【其他字符】')
print(' '.join(other_chars))