refactor: 重构项目结构
- 将 Python 代码移至 analyzer/ 目录(含 venv) - 拆分 Go 服务器代码为模块化结构: - config/: 配置加载 - model/: 请求/响应模型 - service/: 业务逻辑 - handler/: API处理器 - 添加 .gitignore 文件 - 删除旧的独立脚本文件
This commit is contained in:
168
analyzer/category.py
Normal file
168
analyzer/category.py
Normal file
@@ -0,0 +1,168 @@
|
||||
"""
|
||||
交易分类配置和推断逻辑
|
||||
|
||||
配置文件: config/category.yaml
|
||||
"""
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
|
||||
# =============================================================================
|
||||
# 加载 YAML 配置
|
||||
# =============================================================================
|
||||
|
||||
CONFIG_FILE = Path(__file__).parent / "config" / "category.yaml"
|
||||
|
||||
def load_config():
|
||||
"""加载分类配置"""
|
||||
with open(CONFIG_FILE, "r", encoding="utf-8") as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
def reload_config():
|
||||
"""重新加载配置(配置文件修改后调用)"""
|
||||
global _config
|
||||
_config = load_config()
|
||||
|
||||
# 初始化加载
|
||||
_config = load_config()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 配置访问函数
|
||||
# =============================================================================
|
||||
|
||||
def get_platform_merchants() -> list[str]:
|
||||
"""获取需要重新分类的平台型商家列表"""
|
||||
return _config.get("平台商家", [])
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 分类推断函数
|
||||
# =============================================================================
|
||||
|
||||
def infer_category(merchant: str, product: str, income_expense: str) -> tuple[str, bool]:
|
||||
"""
|
||||
根据商户名称和商品信息推断交易分类
|
||||
|
||||
Args:
|
||||
merchant: 交易对方/商户名称
|
||||
product: 商品说明
|
||||
income_expense: 收/支 ("收入" 或 "支出")
|
||||
|
||||
Returns:
|
||||
(分类名称, 是否确定) - 如果无法确定分类,第二个值为 False
|
||||
"""
|
||||
merchant_lower = merchant.lower()
|
||||
product_lower = product.lower()
|
||||
combined = merchant_lower + " " + product_lower
|
||||
|
||||
# 收入类型处理
|
||||
if income_expense == "收入":
|
||||
for category, keywords in _config["收入分类"].items():
|
||||
for kw in keywords:
|
||||
if kw.lower() in combined:
|
||||
return category, True
|
||||
return _config["默认分类"]["收入"], False # 无法确定
|
||||
|
||||
# 支出类型处理 - 按优先级顺序匹配
|
||||
for category, keywords in _config["支出分类"].items():
|
||||
for kw in keywords:
|
||||
if kw.lower() in combined:
|
||||
return category, True
|
||||
|
||||
# 转账类特殊处理
|
||||
if "转账" in combined:
|
||||
return _config["默认分类"]["支出"], False # 转账无法确定具体分类
|
||||
|
||||
return _config["默认分类"]["支出"], False # 无法确定
|
||||
|
||||
|
||||
def get_all_categories() -> list:
|
||||
"""获取所有支出分类列表"""
|
||||
return list(_config["支出分类"].keys()) + [_config["默认分类"]["支出"]]
|
||||
|
||||
|
||||
def get_all_income_categories() -> list:
|
||||
"""获取所有收入分类列表"""
|
||||
return list(_config["收入分类"].keys())
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 分类重推断(用于修正原始分类错误)
|
||||
# =============================================================================
|
||||
|
||||
def reclassify_if_needed(original_category: str, merchant: str, product: str,
|
||||
income_expense: str) -> tuple[str, bool, int]:
|
||||
"""
|
||||
检查是否需要重新分类,如果推断出更准确的分类则返回新分类
|
||||
|
||||
Args:
|
||||
original_category: 原始分类
|
||||
merchant: 交易对方
|
||||
product: 商品说明
|
||||
income_expense: 收/支
|
||||
|
||||
Returns:
|
||||
(分类名称, 是否已修改, 复核等级)
|
||||
|
||||
复核等级:
|
||||
0 = 无需复核(分类确定或原分类可信)
|
||||
1 = 低优先级复核(分类被调整,需确认调整是否正确)
|
||||
2 = 高优先级复核(完全无法判断,原分类也是默认分类)
|
||||
"""
|
||||
new_category, is_certain = infer_category(merchant, product, income_expense)
|
||||
|
||||
default_category = _config["默认分类"].get(
|
||||
"支出" if income_expense == "支出" else "收入"
|
||||
)
|
||||
|
||||
# 判断原分类是否为默认/笼统分类
|
||||
is_original_default = (
|
||||
original_category == default_category or
|
||||
original_category in ["其他", "其他支出", "其他收入", ""]
|
||||
)
|
||||
|
||||
# 情况1:推断出具体分类,且与原分类不同 → 调整分类,低优先级复核
|
||||
if new_category != default_category and new_category != original_category:
|
||||
return new_category, True, 1 # 低优先级复核
|
||||
|
||||
# 情况2:推断失败,但原分类是具体分类 → 信任原分类,无需复核
|
||||
if not is_certain and not is_original_default:
|
||||
return original_category, False, 0 # 信任支付宝原分类
|
||||
|
||||
# 情况3:推断失败,原分类也是默认分类 → 高优先级复核
|
||||
if not is_certain and is_original_default:
|
||||
return new_category, False, 2 # 高优先级复核
|
||||
|
||||
# 情况4:分类确定且未调整 → 无需复核
|
||||
return original_category, False, 0
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 调试工具
|
||||
# =============================================================================
|
||||
|
||||
def test_category(merchant: str, product: str = "", income_expense: str = "支出") -> None:
|
||||
"""测试分类推断结果"""
|
||||
category, is_certain = infer_category(merchant, product, income_expense)
|
||||
review_level = "无" if is_certain else "高 ⚠️"
|
||||
print(f"商户: {merchant}")
|
||||
print(f"商品: {product}")
|
||||
print(f"收支: {income_expense}")
|
||||
print(f"分类: {category}")
|
||||
print(f"复核: {review_level}")
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=== 分类测试 ===\n")
|
||||
|
||||
test_cases = [
|
||||
("luckin coffee", "订单付款", "支出"),
|
||||
("美团", "万达影城-美团App", "支出"),
|
||||
("美团", "茶百道-美团App", "支出"),
|
||||
("美团", "美宜佳-美团App", "支出"),
|
||||
]
|
||||
|
||||
for merchant, product, ie in test_cases:
|
||||
test_category(merchant, product, ie)
|
||||
|
||||
Reference in New Issue
Block a user