Files
billai/analyzer/category.py
clz c40a118a3d refactor: 重构项目结构
- 将 Python 代码移至 analyzer/ 目录(含 venv)
- 拆分 Go 服务器代码为模块化结构:
  - config/: 配置加载
  - model/: 请求/响应模型
  - service/: 业务逻辑
  - handler/: API处理器
- 添加 .gitignore 文件
- 删除旧的独立脚本文件
2026-01-07 23:26:32 +08:00

169 lines
5.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
交易分类配置和推断逻辑
配置文件: config/category.yaml
"""
import yaml
from pathlib import Path
# =============================================================================
# 加载 YAML 配置
# =============================================================================
CONFIG_FILE = Path(__file__).parent / "config" / "category.yaml"
def load_config():
"""加载分类配置"""
with open(CONFIG_FILE, "r", encoding="utf-8") as f:
return yaml.safe_load(f)
def reload_config():
"""重新加载配置(配置文件修改后调用)"""
global _config
_config = load_config()
# 初始化加载
_config = load_config()
# =============================================================================
# 配置访问函数
# =============================================================================
def get_platform_merchants() -> list[str]:
"""获取需要重新分类的平台型商家列表"""
return _config.get("平台商家", [])
# =============================================================================
# 分类推断函数
# =============================================================================
def infer_category(merchant: str, product: str, income_expense: str) -> tuple[str, bool]:
"""
根据商户名称和商品信息推断交易分类
Args:
merchant: 交易对方/商户名称
product: 商品说明
income_expense: 收/支 ("收入""支出")
Returns:
(分类名称, 是否确定) - 如果无法确定分类,第二个值为 False
"""
merchant_lower = merchant.lower()
product_lower = product.lower()
combined = merchant_lower + " " + product_lower
# 收入类型处理
if income_expense == "收入":
for category, keywords in _config["收入分类"].items():
for kw in keywords:
if kw.lower() in combined:
return category, True
return _config["默认分类"]["收入"], False # 无法确定
# 支出类型处理 - 按优先级顺序匹配
for category, keywords in _config["支出分类"].items():
for kw in keywords:
if kw.lower() in combined:
return category, True
# 转账类特殊处理
if "转账" in combined:
return _config["默认分类"]["支出"], False # 转账无法确定具体分类
return _config["默认分类"]["支出"], False # 无法确定
def get_all_categories() -> list:
"""获取所有支出分类列表"""
return list(_config["支出分类"].keys()) + [_config["默认分类"]["支出"]]
def get_all_income_categories() -> list:
"""获取所有收入分类列表"""
return list(_config["收入分类"].keys())
# =============================================================================
# 分类重推断(用于修正原始分类错误)
# =============================================================================
def reclassify_if_needed(original_category: str, merchant: str, product: str,
income_expense: str) -> tuple[str, bool, int]:
"""
检查是否需要重新分类,如果推断出更准确的分类则返回新分类
Args:
original_category: 原始分类
merchant: 交易对方
product: 商品说明
income_expense: 收/支
Returns:
(分类名称, 是否已修改, 复核等级)
复核等级:
0 = 无需复核(分类确定或原分类可信)
1 = 低优先级复核(分类被调整,需确认调整是否正确)
2 = 高优先级复核(完全无法判断,原分类也是默认分类)
"""
new_category, is_certain = infer_category(merchant, product, income_expense)
default_category = _config["默认分类"].get(
"支出" if income_expense == "支出" else "收入"
)
# 判断原分类是否为默认/笼统分类
is_original_default = (
original_category == default_category or
original_category in ["其他", "其他支出", "其他收入", ""]
)
# 情况1推断出具体分类且与原分类不同 → 调整分类,低优先级复核
if new_category != default_category and new_category != original_category:
return new_category, True, 1 # 低优先级复核
# 情况2推断失败但原分类是具体分类 → 信任原分类,无需复核
if not is_certain and not is_original_default:
return original_category, False, 0 # 信任支付宝原分类
# 情况3推断失败原分类也是默认分类 → 高优先级复核
if not is_certain and is_original_default:
return new_category, False, 2 # 高优先级复核
# 情况4分类确定且未调整 → 无需复核
return original_category, False, 0
# =============================================================================
# 调试工具
# =============================================================================
def test_category(merchant: str, product: str = "", income_expense: str = "支出") -> None:
"""测试分类推断结果"""
category, is_certain = infer_category(merchant, product, income_expense)
review_level = "" if is_certain else "高 ⚠️"
print(f"商户: {merchant}")
print(f"商品: {product}")
print(f"收支: {income_expense}")
print(f"分类: {category}")
print(f"复核: {review_level}")
print()
if __name__ == "__main__":
print("=== 分类测试 ===\n")
test_cases = [
("luckin coffee", "订单付款", "支出"),
("美团", "万达影城-美团App", "支出"),
("美团", "茶百道-美团App", "支出"),
("美团", "美宜佳-美团App", "支出"),
]
for merchant, product, ie in test_cases:
test_category(merchant, product, ie)