- 将 Python 代码移至 analyzer/ 目录(含 venv) - 拆分 Go 服务器代码为模块化结构: - config/: 配置加载 - model/: 请求/响应模型 - service/: 业务逻辑 - handler/: API处理器 - 添加 .gitignore 文件 - 删除旧的独立脚本文件
169 lines
5.8 KiB
Python
169 lines
5.8 KiB
Python
"""
|
||
交易分类配置和推断逻辑
|
||
|
||
配置文件: config/category.yaml
|
||
"""
|
||
import yaml
|
||
from pathlib import Path
|
||
|
||
# =============================================================================
|
||
# 加载 YAML 配置
|
||
# =============================================================================
|
||
|
||
CONFIG_FILE = Path(__file__).parent / "config" / "category.yaml"
|
||
|
||
def load_config():
|
||
"""加载分类配置"""
|
||
with open(CONFIG_FILE, "r", encoding="utf-8") as f:
|
||
return yaml.safe_load(f)
|
||
|
||
def reload_config():
|
||
"""重新加载配置(配置文件修改后调用)"""
|
||
global _config
|
||
_config = load_config()
|
||
|
||
# 初始化加载
|
||
_config = load_config()
|
||
|
||
|
||
# =============================================================================
|
||
# 配置访问函数
|
||
# =============================================================================
|
||
|
||
def get_platform_merchants() -> list[str]:
|
||
"""获取需要重新分类的平台型商家列表"""
|
||
return _config.get("平台商家", [])
|
||
|
||
|
||
# =============================================================================
|
||
# 分类推断函数
|
||
# =============================================================================
|
||
|
||
def infer_category(merchant: str, product: str, income_expense: str) -> tuple[str, bool]:
|
||
"""
|
||
根据商户名称和商品信息推断交易分类
|
||
|
||
Args:
|
||
merchant: 交易对方/商户名称
|
||
product: 商品说明
|
||
income_expense: 收/支 ("收入" 或 "支出")
|
||
|
||
Returns:
|
||
(分类名称, 是否确定) - 如果无法确定分类,第二个值为 False
|
||
"""
|
||
merchant_lower = merchant.lower()
|
||
product_lower = product.lower()
|
||
combined = merchant_lower + " " + product_lower
|
||
|
||
# 收入类型处理
|
||
if income_expense == "收入":
|
||
for category, keywords in _config["收入分类"].items():
|
||
for kw in keywords:
|
||
if kw.lower() in combined:
|
||
return category, True
|
||
return _config["默认分类"]["收入"], False # 无法确定
|
||
|
||
# 支出类型处理 - 按优先级顺序匹配
|
||
for category, keywords in _config["支出分类"].items():
|
||
for kw in keywords:
|
||
if kw.lower() in combined:
|
||
return category, True
|
||
|
||
# 转账类特殊处理
|
||
if "转账" in combined:
|
||
return _config["默认分类"]["支出"], False # 转账无法确定具体分类
|
||
|
||
return _config["默认分类"]["支出"], False # 无法确定
|
||
|
||
|
||
def get_all_categories() -> list:
|
||
"""获取所有支出分类列表"""
|
||
return list(_config["支出分类"].keys()) + [_config["默认分类"]["支出"]]
|
||
|
||
|
||
def get_all_income_categories() -> list:
|
||
"""获取所有收入分类列表"""
|
||
return list(_config["收入分类"].keys())
|
||
|
||
|
||
# =============================================================================
|
||
# 分类重推断(用于修正原始分类错误)
|
||
# =============================================================================
|
||
|
||
def reclassify_if_needed(original_category: str, merchant: str, product: str,
|
||
income_expense: str) -> tuple[str, bool, int]:
|
||
"""
|
||
检查是否需要重新分类,如果推断出更准确的分类则返回新分类
|
||
|
||
Args:
|
||
original_category: 原始分类
|
||
merchant: 交易对方
|
||
product: 商品说明
|
||
income_expense: 收/支
|
||
|
||
Returns:
|
||
(分类名称, 是否已修改, 复核等级)
|
||
|
||
复核等级:
|
||
0 = 无需复核(分类确定或原分类可信)
|
||
1 = 低优先级复核(分类被调整,需确认调整是否正确)
|
||
2 = 高优先级复核(完全无法判断,原分类也是默认分类)
|
||
"""
|
||
new_category, is_certain = infer_category(merchant, product, income_expense)
|
||
|
||
default_category = _config["默认分类"].get(
|
||
"支出" if income_expense == "支出" else "收入"
|
||
)
|
||
|
||
# 判断原分类是否为默认/笼统分类
|
||
is_original_default = (
|
||
original_category == default_category or
|
||
original_category in ["其他", "其他支出", "其他收入", ""]
|
||
)
|
||
|
||
# 情况1:推断出具体分类,且与原分类不同 → 调整分类,低优先级复核
|
||
if new_category != default_category and new_category != original_category:
|
||
return new_category, True, 1 # 低优先级复核
|
||
|
||
# 情况2:推断失败,但原分类是具体分类 → 信任原分类,无需复核
|
||
if not is_certain and not is_original_default:
|
||
return original_category, False, 0 # 信任支付宝原分类
|
||
|
||
# 情况3:推断失败,原分类也是默认分类 → 高优先级复核
|
||
if not is_certain and is_original_default:
|
||
return new_category, False, 2 # 高优先级复核
|
||
|
||
# 情况4:分类确定且未调整 → 无需复核
|
||
return original_category, False, 0
|
||
|
||
|
||
# =============================================================================
|
||
# 调试工具
|
||
# =============================================================================
|
||
|
||
def test_category(merchant: str, product: str = "", income_expense: str = "支出") -> None:
|
||
"""测试分类推断结果"""
|
||
category, is_certain = infer_category(merchant, product, income_expense)
|
||
review_level = "无" if is_certain else "高 ⚠️"
|
||
print(f"商户: {merchant}")
|
||
print(f"商品: {product}")
|
||
print(f"收支: {income_expense}")
|
||
print(f"分类: {category}")
|
||
print(f"复核: {review_level}")
|
||
print()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
print("=== 分类测试 ===\n")
|
||
|
||
test_cases = [
|
||
("luckin coffee", "订单付款", "支出"),
|
||
("美团", "万达影城-美团App", "支出"),
|
||
("美团", "茶百道-美团App", "支出"),
|
||
("美团", "美宜佳-美团App", "支出"),
|
||
]
|
||
|
||
for merchant, product, ie in test_cases:
|
||
test_category(merchant, product, ie)
|
||
|