diff --git a/analyzer/cleaners/jd.py b/analyzer/cleaners/jd.py index a8632d1..c4533ea 100644 --- a/analyzer/cleaners/jd.py +++ b/analyzer/cleaners/jd.py @@ -26,7 +26,7 @@ def load_jd_config(): _jd_config = load_jd_config() -def infer_jd_category(merchant: str, product: str, original_category: str) -> tuple[str, bool]: +def infer_jd_category(merchant: str, product: str, original_category: str) -> tuple[str, bool, int]: """ 根据京东账单的商户名称、商品说明和原分类推断统一分类 @@ -36,13 +36,18 @@ def infer_jd_category(merchant: str, product: str, original_category: str) -> tu original_category: 京东原始分类(如"食品酒饮"、"数码电器") Returns: - (分类名称, 是否确定) - 如果无法确定分类,第二个值为 False + (分类名称, 是否确定, 复核等级) + + 复核等级: + 0 = 无需复核(商户映射或原分类映射成功,高置信度) + 1 = 低优先级复核(通用关键词匹配成功,需确认) + 2 = 高优先级复核(全部匹配失败或未知分类,需人工分类) """ # 1. 先检查商户名称直接映射(如"京东外卖" -> "餐饮美食") merchant_mapping = _jd_config.get("商户映射", {}) for merchant_key, category in merchant_mapping.items(): if merchant_key in merchant: - return category, True + return category, True, 0 # 商户映射,无需复核 # 2. 尝试直接映射京东原分类 category_mapping = _jd_config.get("分类映射", {}) @@ -50,17 +55,22 @@ def infer_jd_category(merchant: str, product: str, original_category: str) -> tu # 处理多分类情况(如"食品酒饮 其他网购") original_cats = original_category.split() if original_category else [] for orig_cat in original_cats: - mapped = category_mapping.get(orig_cat) - if mapped: # 非空映射 - return mapped, True + if orig_cat in category_mapping: + mapped = category_mapping[orig_cat] + if mapped: # 非空映射 → 使用映射结果 + return mapped, True, 0 # 原分类映射,无需复核 + # 空映射(如"其他"→"")→ 继续检查下一个原分类或进入关键词匹配 + else: + # 未知分类(不在映射表中)→ 保留原分类,HIGH 复核 + return orig_cat, True, 2 # 3. 使用通用分类推断(已包含京东平台商户关键词) category, is_certain = infer_category(merchant, product, "支出") if is_certain: - return category, True + return category, True, 1 # 关键词匹配,低优先级复核 # 4. 返回默认分类 - return _jd_config.get("默认分类", "其他支出"), False + return _jd_config.get("默认分类", "其他支出"), False, 2 # 全部失败,高优先级复核 # 与支付宝/微信对齐的表头(包含"复核等级"字段) @@ -294,10 +304,11 @@ class JDCleaner(BaseCleaner): final_remark = remark if remark else (row[10] if len(row) > 10 else "/") # 使用京东专属分类推断 - category, is_certain = infer_jd_category(merchant, product, original_category) + category, is_certain, review_level = infer_jd_category(merchant, product, original_category) - # 复核等级: 空=无需复核, HIGH=无法判断 - review_mark = "" if is_certain else "HIGH" + # 复核等级映射: 0=空, 1=LOW, 2=HIGH + review_marks = {0: "", 1: "LOW", 2: "HIGH"} + review_mark = review_marks.get(review_level, "") return [ transaction_time,