""" 交易分类配置和推断逻辑 配置文件: config/category.yaml """ import yaml from pathlib import Path # ============================================================================= # 加载 YAML 配置 # ============================================================================= CONFIG_FILE = Path(__file__).parent / "config" / "category.yaml" def load_config(): """加载分类配置""" with open(CONFIG_FILE, "r", encoding="utf-8") as f: return yaml.safe_load(f) def reload_config(): """重新加载配置(配置文件修改后调用)""" global _config _config = load_config() # 初始化加载 _config = load_config() # ============================================================================= # 配置访问函数 # ============================================================================= def get_platform_merchants() -> list[str]: """获取需要重新分类的平台型商家列表""" return _config.get("平台商家", []) # ============================================================================= # 分类推断函数 # ============================================================================= def infer_category(merchant: str, product: str, income_expense: str) -> tuple[str, bool]: """ 根据商户名称和商品信息推断交易分类 Args: merchant: 交易对方/商户名称 product: 商品说明 income_expense: 收/支 ("收入" 或 "支出") Returns: (分类名称, 是否确定) - 如果无法确定分类,第二个值为 False """ merchant_lower = merchant.lower() product_lower = product.lower() combined = merchant_lower + " " + product_lower # 收入类型处理 if income_expense == "收入": for category, keywords in _config["收入分类"].items(): for kw in keywords: if kw.lower() in combined: return category, True return _config["默认分类"]["收入"], False # 无法确定 # 支出类型处理 - 按优先级顺序匹配 for category, keywords in _config["支出分类"].items(): for kw in keywords: if kw.lower() in combined: return category, True # 转账类特殊处理 if "转账" in combined: return _config["默认分类"]["支出"], False # 转账无法确定具体分类 return _config["默认分类"]["支出"], False # 无法确定 def get_all_categories() -> list: """获取所有支出分类列表""" return list(_config["支出分类"].keys()) + [_config["默认分类"]["支出"]] def get_all_income_categories() -> list: """获取所有收入分类列表""" return list(_config["收入分类"].keys()) # ============================================================================= # 分类重推断(用于修正原始分类错误) # ============================================================================= def reclassify_if_needed(original_category: str, merchant: str, product: str, income_expense: str) -> tuple[str, bool, int]: """ 检查是否需要重新分类,如果推断出更准确的分类则返回新分类 Args: original_category: 原始分类 merchant: 交易对方 product: 商品说明 income_expense: 收/支 Returns: (分类名称, 是否已修改, 复核等级) 复核等级: 0 = 无需复核(分类确定或原分类可信) 1 = 低优先级复核(分类被调整,需确认调整是否正确) 2 = 高优先级复核(完全无法判断,原分类也是默认分类) """ new_category, is_certain = infer_category(merchant, product, income_expense) default_category = _config["默认分类"].get( "支出" if income_expense == "支出" else "收入" ) # 判断原分类是否为默认/笼统分类 is_original_default = ( original_category == default_category or original_category in ["其他", "其他支出", "其他收入", ""] ) # 情况1:推断出具体分类,且与原分类不同 → 调整分类,低优先级复核 if new_category != default_category and new_category != original_category: return new_category, True, 1 # 低优先级复核 # 情况2:推断失败,但原分类是具体分类 → 信任原分类,无需复核 if not is_certain and not is_original_default: return original_category, False, 0 # 信任支付宝原分类 # 情况3:推断失败,原分类也是默认分类 → 高优先级复核 if not is_certain and is_original_default: return new_category, False, 2 # 高优先级复核 # 情况4:分类确定且未调整 → 无需复核 return original_category, False, 0 # ============================================================================= # 调试工具 # ============================================================================= def test_category(merchant: str, product: str = "", income_expense: str = "支出") -> None: """测试分类推断结果""" category, is_certain = infer_category(merchant, product, income_expense) review_level = "无" if is_certain else "高 ⚠️" print(f"商户: {merchant}") print(f"商品: {product}") print(f"收支: {income_expense}") print(f"分类: {category}") print(f"复核: {review_level}") print() if __name__ == "__main__": print("=== 分类测试 ===\n") test_cases = [ ("luckin coffee", "订单付款", "支出"), ("美团", "万达影城-美团App", "支出"), ("美团", "茶百道-美团App", "支出"), ("美团", "美宜佳-美团App", "支出"), ] for merchant, product, ie in test_cases: test_category(merchant, product, ie)