from fontTools.ttLib import TTFont
from fontTools import subset
import os
import re

# 保留的字符集
chinese_range = [
    (0x4E00, 0x9FFF),   # CJK Unified Ideographs
    # (0x3400, 0x4DBF),   # CJK Unified Ideographs Extension A
    # (0x20000, 0x2A6DF), # CJK Unified Ideographs Extension B
    # (0x2A700, 0x2B73F), # CJK Unified Ideographs Extension C
    # (0x2B740, 0x2B81F), # CJK Unified Ideographs Extension D
    # (0x2B820, 0x2CEAF), # CJK Unified Ideographs Extension E
    # (0x2CEB0, 0x2EBEF), # CJK Unified Ideographs Extension F
]
def is_chinese(char):
    code = ord(char)
    return any(start <= code <= end for start, end in chinese_range)

def is_english(char):
    return ('A' <= char <= 'Z') or ('a' <= char <= 'z')

def is_digit(char):
    return char.isdigit()

# 汉语评语常用字符（可根据需要扩展）
common_comment_chars = "优良中差及格优秀通过未通过"

# 中英文标点符号
punctuations = (
    r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""  # 英文标点
    r"。？！，、；：“”‘’（）《》【】—……￥·"  # 中文标点
)

def should_keep(char):
    return (
        is_chinese(char) or
        is_english(char) or
        is_digit(char) or
        char in common_comment_chars or
        char in punctuations or
        char.isspace()  # 保留空格字符
    )

def get_keep_glyphs(font):
    cmap = font.getBestCmap()
    keep_glyphs = set()
    for codepoint, glyph_name in cmap.items():
        char = chr(codepoint)
        if should_keep(char):
            keep_glyphs.add(glyph_name)
    # 保留. notdef
    keep_glyphs.add('.notdef')
    return keep_glyphs

def subset_font(input_path, output_path):
    # 收集需要保留的字符
    keep_chars = set()
    
    # 添加中文字符
    for start, end in chinese_range:
        for code in range(start, min(end + 1, 0x10000)):  # 限制在BMP范围内以避免过多字符
            try:
                char = chr(code)
                if is_chinese(char):
                    keep_chars.add(char)
            except ValueError:
                continue
    

    # 添加英文字母
    for i in range(ord('A'), ord('Z') + 1):
        keep_chars.add(chr(i))
    for i in range(ord('a'), ord('z') + 1):
        keep_chars.add(chr(i))

    # 添加汉语拼音带声调的字符
    pinyin_tone_letters = [
        # a系列
        'ā', 'á', 'ǎ', 'à',
        # o系列
        'ō', 'ó', 'ǒ', 'ò',
        # e系列
        'ē', 'é', 'ě', 'è',
        # i系列
        'ī', 'í', 'ǐ', 'ì',
        # u系列
        'ū', 'ú', 'ǔ', 'ù',
        # ü系列
        'ǖ', 'ǘ', 'ǚ', 'ǜ', 'ü',
        # A系列
        'Ā', 'Á', 'Ǎ', 'À',
        # O系列
        'Ō', 'Ó', 'Ǒ', 'Ò',
        # E系列
        'Ē', 'É', 'Ě', 'È',
        # I系列
        'Ī', 'Í', 'Ǐ', 'Ì',
        # U系列
        'Ū', 'Ú', 'Ǔ', 'Ù',
        # Ü系列
        'Ǖ', 'Ǘ', 'Ǚ', 'Ǜ', 'Ü',
    ]
    keep_chars.update(pinyin_tone_letters)

    # 添加数字
    for i in range(ord('0'), ord('9') + 1):
        keep_chars.add(chr(i))

    # 添加常用字符
    keep_chars.update(common_comment_chars)
    keep_chars.update(punctuations)
    keep_chars.add(' ')  # 空格
    
    # 使用fontTools的subset功能
    font = TTFont(input_path)
    subsetter = subset.Subsetter()
    
    # 设置subset选项
    subsetter.options.retain_gids = False
    subsetter.options.desubroutinize = True
    subsetter.options.drop_tables = ['DSIG']  # 删除数字签名表
    
    # 获取要保留的Unicode码点
    unicodes = [ord(char) for char in keep_chars]
    
    # 执行subset
    subsetter.populate(unicodes=unicodes)
    subsetter.subset(font)
    
    # 保存结果
    font.save(output_path)
    
    # 打印统计信息
    original_font = TTFont(input_path)
    original_glyphs = len(original_font.getGlyphOrder())
    subset_glyphs = len(font.getGlyphOrder())
    print(f"原始字体字形数量: {original_glyphs}")
    print(f"子集字体字形数量: {subset_glyphs}")
    print(f"减少了: {original_glyphs - subset_glyphs} 个字形")

if __name__ == "__main__":
    input_font = "font_origion.TTF"
    output_font = "font.TTF"
    subset_font(input_font, output_font)
    print(f"Subset font saved to {output_font}")