diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fc58f6e --- /dev/null +++ b/.gitignore @@ -0,0 +1,23 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +analyzer/venv/ +.venv/ +*.egg-info/ + +# Go +server/billai-server + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +.DS_Store + +# 临时文件 +server/uploads/ +server/outputs/ +*.log + diff --git a/analyzer/category.py b/analyzer/category.py new file mode 100644 index 0000000..66093c2 --- /dev/null +++ b/analyzer/category.py @@ -0,0 +1,168 @@ +""" +交易分类配置和推断逻辑 + +配置文件: config/category.yaml +""" +import yaml +from pathlib import Path + +# ============================================================================= +# 加载 YAML 配置 +# ============================================================================= + +CONFIG_FILE = Path(__file__).parent / "config" / "category.yaml" + +def load_config(): + """加载分类配置""" + with open(CONFIG_FILE, "r", encoding="utf-8") as f: + return yaml.safe_load(f) + +def reload_config(): + """重新加载配置(配置文件修改后调用)""" + global _config + _config = load_config() + +# 初始化加载 +_config = load_config() + + +# ============================================================================= +# 配置访问函数 +# ============================================================================= + +def get_platform_merchants() -> list[str]: + """获取需要重新分类的平台型商家列表""" + return _config.get("平台商家", []) + + +# ============================================================================= +# 分类推断函数 +# ============================================================================= + +def infer_category(merchant: str, product: str, income_expense: str) -> tuple[str, bool]: + """ + 根据商户名称和商品信息推断交易分类 + + Args: + merchant: 交易对方/商户名称 + product: 商品说明 + income_expense: 收/支 ("收入" 或 "支出") + + Returns: + (分类名称, 是否确定) - 如果无法确定分类,第二个值为 False + """ + merchant_lower = merchant.lower() + product_lower = product.lower() + combined = merchant_lower + " " + product_lower + + # 收入类型处理 + if income_expense == "收入": + for category, keywords in _config["收入分类"].items(): + for kw in keywords: + if kw.lower() in combined: + return category, True + return _config["默认分类"]["收入"], False # 无法确定 + + # 支出类型处理 - 按优先级顺序匹配 + for category, keywords in _config["支出分类"].items(): + for kw in keywords: + if kw.lower() in combined: + return category, True + + # 转账类特殊处理 + if "转账" in combined: + return _config["默认分类"]["支出"], False # 转账无法确定具体分类 + + return _config["默认分类"]["支出"], False # 无法确定 + + +def get_all_categories() -> list: + """获取所有支出分类列表""" + return list(_config["支出分类"].keys()) + [_config["默认分类"]["支出"]] + + +def get_all_income_categories() -> list: + """获取所有收入分类列表""" + return list(_config["收入分类"].keys()) + + +# ============================================================================= +# 分类重推断(用于修正原始分类错误) +# ============================================================================= + +def reclassify_if_needed(original_category: str, merchant: str, product: str, + income_expense: str) -> tuple[str, bool, int]: + """ + 检查是否需要重新分类,如果推断出更准确的分类则返回新分类 + + Args: + original_category: 原始分类 + merchant: 交易对方 + product: 商品说明 + income_expense: 收/支 + + Returns: + (分类名称, 是否已修改, 复核等级) + + 复核等级: + 0 = 无需复核(分类确定或原分类可信) + 1 = 低优先级复核(分类被调整,需确认调整是否正确) + 2 = 高优先级复核(完全无法判断,原分类也是默认分类) + """ + new_category, is_certain = infer_category(merchant, product, income_expense) + + default_category = _config["默认分类"].get( + "支出" if income_expense == "支出" else "收入" + ) + + # 判断原分类是否为默认/笼统分类 + is_original_default = ( + original_category == default_category or + original_category in ["其他", "其他支出", "其他收入", ""] + ) + + # 情况1:推断出具体分类,且与原分类不同 → 调整分类,低优先级复核 + if new_category != default_category and new_category != original_category: + return new_category, True, 1 # 低优先级复核 + + # 情况2:推断失败,但原分类是具体分类 → 信任原分类,无需复核 + if not is_certain and not is_original_default: + return original_category, False, 0 # 信任支付宝原分类 + + # 情况3:推断失败,原分类也是默认分类 → 高优先级复核 + if not is_certain and is_original_default: + return new_category, False, 2 # 高优先级复核 + + # 情况4:分类确定且未调整 → 无需复核 + return original_category, False, 0 + + +# ============================================================================= +# 调试工具 +# ============================================================================= + +def test_category(merchant: str, product: str = "", income_expense: str = "支出") -> None: + """测试分类推断结果""" + category, is_certain = infer_category(merchant, product, income_expense) + review_level = "无" if is_certain else "高 ⚠️" + print(f"商户: {merchant}") + print(f"商品: {product}") + print(f"收支: {income_expense}") + print(f"分类: {category}") + print(f"复核: {review_level}") + print() + + +if __name__ == "__main__": + print("=== 分类测试 ===\n") + + test_cases = [ + ("luckin coffee", "订单付款", "支出"), + ("美团", "万达影城-美团App", "支出"), + ("美团", "茶百道-美团App", "支出"), + ("美团", "美宜佳-美团App", "支出"), + ] + + for merchant, product, ie in test_cases: + test_category(merchant, product, ie) + diff --git a/analyzer/clean_bill.py b/analyzer/clean_bill.py new file mode 100644 index 0000000..7e80dea --- /dev/null +++ b/analyzer/clean_bill.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +""" +账单清理统一入口 +自动识别微信或支付宝账单,调用对应的清理逻辑 + +用法: python clean_bill.py <输入文件> [输出文件] [日期筛选选项] +示例: + python clean_bill.py 账单.csv --year 2026 + python clean_bill.py 账单.csv --year 2026 --month 1 + python clean_bill.py 账单.csv --start 2026-01-01 --end 2026-01-15 +""" +import sys +from pathlib import Path + +from cleaners.base import create_arg_parser, compute_date_range +from cleaners import AlipayCleaner, WechatCleaner + + +def detect_bill_type(filepath: str) -> str | None: + """ + 检测账单类型 + + Returns: + 'alipay' | 'wechat' | None + """ + try: + with open(filepath, "r", encoding="utf-8") as f: + for _ in range(20): + line = f.readline() + if not line: + break + + # 支付宝特征:表头包含"交易分类"和"对方账号" + if "交易分类" in line and "对方账号" in line: + return "alipay" + + # 微信特征:表头包含"交易类型"和"金额(元)" + if "交易类型" in line and "金额(元)" in line: + return "wechat" + + # 数据行特征 + if line.startswith("202"): + if "¥" in line: + return "wechat" + if "@" in line: + return "alipay" + + except Exception as e: + print(f"读取文件失败: {e}", file=sys.stderr) + return None + + return None + + +def main(): + parser = create_arg_parser("账单清理统一入口 - 自动识别微信/支付宝账单") + parser.add_argument( + "--type", "-t", + choices=["alipay", "wechat", "auto"], + default="auto", + help="手动指定账单类型(默认自动检测)" + ) + + args = parser.parse_args() + input_file = args.input_file + + # 检查文件是否存在 + if not Path(input_file).exists(): + print(f"❌ 错误:文件不存在 - {input_file}", file=sys.stderr) + sys.exit(1) + + # 检测账单类型 + if args.type == "auto": + bill_type = detect_bill_type(input_file) + if bill_type is None: + print("❌ 无法识别账单类型,请使用 --type 参数手动指定", file=sys.stderr) + print(" 支持: --type alipay (支付宝) 或 --type wechat (微信)", file=sys.stderr) + sys.exit(1) + else: + bill_type = args.type + + # 显示检测结果 + type_names = {"alipay": "支付宝", "wechat": "微信"} + print(f"📋 检测到账单类型: {type_names[bill_type]}") + print() + + # 计算日期范围 + start_date, end_date = compute_date_range(args) + + # 获取输出格式 + output_format = getattr(args, 'format', 'csv') + + # 创建对应的清理器 + if bill_type == "alipay": + cleaner = AlipayCleaner(args.input_file, args.output_file, output_format) + else: + cleaner = WechatCleaner(args.input_file, args.output_file, output_format) + + cleaner.set_date_range(start_date, end_date) + cleaner.clean() + + +if __name__ == "__main__": + main() diff --git a/analyzer/cleaners/__init__.py b/analyzer/cleaners/__init__.py new file mode 100644 index 0000000..2ded5c3 --- /dev/null +++ b/analyzer/cleaners/__init__.py @@ -0,0 +1,9 @@ +""" +账单清理模块 +""" +from .base import BaseCleaner +from .alipay import AlipayCleaner +from .wechat import WechatCleaner + +__all__ = ['BaseCleaner', 'AlipayCleaner', 'WechatCleaner'] + diff --git a/analyzer/cleaners/alipay.py b/analyzer/cleaners/alipay.py new file mode 100644 index 0000000..3a144df --- /dev/null +++ b/analyzer/cleaners/alipay.py @@ -0,0 +1,231 @@ +""" +支付宝账单清理模块 +""" +import csv +from decimal import Decimal + +from .base import ( + BaseCleaner, parse_amount, format_amount, + is_in_date_range, create_arg_parser +) +from category import reclassify_if_needed, get_platform_merchants + + +class AlipayCleaner(BaseCleaner): + """支付宝账单清理器""" + + def clean(self) -> None: + """执行清理""" + self.print_header() + + # 读取数据 + with open(self.input_file, "r", encoding="utf-8") as f: + reader = csv.reader(f) + header = next(reader) + rows = list(reader) + + self.stats["original_count"] = len(rows) + print(f"原始数据行数: {len(rows)}") + + # 第一步:按日期范围筛选 + rows_filtered = [ + row for row in rows + if row and is_in_date_range(row[0], self.start_date, self.end_date) + ] + self.stats["filtered_count"] = len(rows_filtered) + + date_desc = f"{self.start_date} ~ {self.end_date}" if self.start_date or self.end_date else "全部" + print(f"筛选后数据行数: {len(rows_filtered)} ({date_desc})") + + # 第二步:分离退款和非退款条目 + refund_rows = [] + expense_rows = [] + + for row in rows_filtered: + if len(row) > 1 and row[1] == "退款": + refund_rows.append(row) + else: + expense_rows.append(row) + + print(f"退款条目数: {len(refund_rows)}") + print(f"非退款条目数: {len(expense_rows)}") + + # 第三步:处理退款 + order_refunds = self._aggregate_refunds(refund_rows) + print(f"有退款的订单数: {len(order_refunds)}") + + # 第四步:处理每笔支出 + final_rows = self._process_expenses(expense_rows, order_refunds) + + print(f"\n处理结果:") + print(f" 全额退款删除: {self.stats['fully_refunded']} 条") + print(f" 部分退款调整: {self.stats['partially_refunded']} 条") + print(f" 最终保留行数: {len(final_rows)}") + + # 第五步:重新分类并添加"需复核"标注 + final_rows = self.reclassify(final_rows, header) + + if self.stats["category_adjusted"] > 0: + print(f" 分类调整: {self.stats['category_adjusted']} 条") + + self.stats["final_count"] = len(final_rows) + + # 写入文件 + self.write_output(header, final_rows) + + print(f"\n清理后的数据已保存到: {self.output_file}") + + def _aggregate_refunds(self, refund_rows: list) -> dict: + """聚合退款金额""" + order_refunds = {} + + for row in refund_rows: + if len(row) >= 11: + refund_order_no = row[9].strip() + refund_merchant_no = row[10].strip() + refund_amount = parse_amount(row[6]) + + original_order = refund_order_no.split("_")[0] if "_" in refund_order_no else refund_order_no + key = original_order if original_order else refund_merchant_no + + if key: + if key not in order_refunds: + order_refunds[key] = Decimal("0") + order_refunds[key] += refund_amount + print(f" 退款记录: {row[0]} | {row[2]} | {refund_amount}元") + + return order_refunds + + def _process_expenses(self, expense_rows: list, order_refunds: dict) -> list: + """处理支出记录""" + final_rows = [] + + for row in expense_rows: + if len(row) >= 12: + order_no = row[9].strip() + merchant_no = row[10].strip() + expense_amount = parse_amount(row[6]) + + # 查找对应的退款 + refund_amount = Decimal("0") + matched_key = None + + for key, amount in order_refunds.items(): + if key and (order_no == key or merchant_no == key or order_no.startswith(key)): + refund_amount = amount + matched_key = key + break + + if matched_key: + if refund_amount >= expense_amount: + # 全额退款,删除 + self.stats["fully_refunded"] += 1 + print(f" 全额退款删除: {row[0]} | {row[2]} | {row[4][:25]}... | 原{expense_amount}元") + else: + # 部分退款,保留差额 + remaining = expense_amount - refund_amount + new_row = row.copy() + new_row[6] = format_amount(remaining) + + original_remark = new_row[11] if len(new_row) > 11 else "" + new_row[11] = f"原金额{expense_amount}元,退款{refund_amount}元{';' + original_remark if original_remark else ''}" + + final_rows.append(new_row) + self.stats["partially_refunded"] += 1 + print(f" 部分退款: {row[0]} | {row[2]} | 原{expense_amount}元 -> {format_amount(remaining)}元") + else: + final_rows.append(row) + else: + final_rows.append(row) + + return final_rows + + def _is_platform_merchant(self, merchant: str) -> bool: + """判断是否为平台型商家(从配置文件读取)""" + platform_merchants = get_platform_merchants() + return any(platform in merchant for platform in platform_merchants) + + def reclassify(self, rows: list, header: list) -> list: + """ + 重新分类支付宝账单,并添加"复核等级"标注字段 + + 只对平台型商家(美团、京东、抖音等)进行分类调整, + 其他商家直接信任支付宝原分类。 + + 复核等级: + 空 = 无需复核 + 低 = 分类被调整,需确认调整是否正确 + 高 = 完全无法判断,需人工分类 + + 字段索引: + 0: 交易时间 + 1: 交易分类 + 2: 交易对方 + 4: 商品说明 + 5: 收/支 + """ + # 添加"复核等级"字段到表头 + if "复核等级" not in header: + header.append("复核等级") + + review_low_count = 0 + review_high_count = 0 + + for row in rows: + if len(row) >= 6: + original_category = row[1] + merchant = row[2] + product = row[4] + income_expense = row[5] + + review_mark = "" + + # 只对平台型商家进行重新分类 + if self._is_platform_merchant(merchant): + new_category, changed, review_level = reclassify_if_needed( + original_category, merchant, product, income_expense + ) + + if changed: + row[1] = new_category + self.stats["category_adjusted"] += 1 + print(f" 分类调整: {merchant[:15]}... | {original_category} -> {new_category}") + + # 添加复核等级标注 + if review_level == 1: + review_mark = "LOW" + review_low_count += 1 + elif review_level == 2: + review_mark = "HIGH" + review_high_count += 1 + # 非平台商家:直接信任支付宝原分类,无需复核 + + # 确保行长度足够 + while len(row) < len(header) - 1: + row.append("") + row.append(review_mark) + + if review_high_count > 0: + print(f" 高优先级复核: {review_high_count} 条(无法判断)") + if review_low_count > 0: + print(f" 低优先级复核: {review_low_count} 条(分类已调整)") + + return rows + + +def main(): + """命令行入口""" + parser = create_arg_parser("清理支付宝交易明细数据") + args = parser.parse_args() + + from .base import get_output_file, compute_date_range + + cleaner = AlipayCleaner(args.input_file, args.output_file) + start_date, end_date = compute_date_range(args) + cleaner.set_date_range(start_date, end_date) + cleaner.clean() + + +if __name__ == "__main__": + main() + diff --git a/analyzer/cleaners/base.py b/analyzer/cleaners/base.py new file mode 100644 index 0000000..43e7989 --- /dev/null +++ b/analyzer/cleaners/base.py @@ -0,0 +1,240 @@ +""" +账单清理基类和公共工具函数 +""" +import csv +import json +import argparse +from abc import ABC, abstractmethod +from datetime import datetime, date, timedelta +from decimal import Decimal, ROUND_HALF_UP +from pathlib import Path + + +# ============================================================================= +# 公共工具函数 +# ============================================================================= + +def parse_date(date_str: str) -> date: + """解析日期字符串,支持 YYYY-MM-DD 或 YYYY/MM/DD 格式""" + for fmt in ("%Y-%m-%d", "%Y/%m/%d"): + try: + return datetime.strptime(date_str, fmt).date() + except ValueError: + continue + raise ValueError(f"无法解析日期: {date_str},请使用 YYYY-MM-DD 格式") + + +def parse_amount(amount_str: str) -> Decimal: + """解析金额字符串为Decimal(去掉¥符号)""" + try: + clean = amount_str.replace("¥", "").replace(" ", "").strip() + return Decimal(clean) + except: + return Decimal("0") + + +def format_amount(amount: Decimal) -> str: + """格式化金额为字符串(保留两位小数)""" + return str(amount.quantize(Decimal("0.01"), rounding=ROUND_HALF_UP)) + + +def compute_date_range(args) -> tuple[date | None, date | None]: + """ + 根据参数计算最终的日期范围 + 多重指定时取交集(最小范围) + + Returns: + (start_date, end_date) 或 (None, None) 表示不筛选 + """ + start_date = None + end_date = None + + # 1. 根据年份设置范围 + if args.year: + year = int(args.year) + start_date = date(year, 1, 1) + end_date = date(year, 12, 31) + + # 2. 根据月份进一步收窄 + if args.month: + month = int(args.month) + year = int(args.year) if args.year else datetime.now().year + + if not start_date: + start_date = date(year, 1, 1) + end_date = date(year, 12, 31) + + month_start = date(year, month, 1) + if month == 12: + month_end = date(year, 12, 31) + else: + month_end = date(year, month + 1, 1) - timedelta(days=1) + + start_date = max(start_date, month_start) if start_date else month_start + end_date = min(end_date, month_end) if end_date else month_end + + # 3. 根据 start/end 参数进一步收窄 + if args.start: + custom_start = parse_date(args.start) + start_date = max(start_date, custom_start) if start_date else custom_start + + if args.end: + custom_end = parse_date(args.end) + end_date = min(end_date, custom_end) if end_date else custom_end + + return start_date, end_date + + +def is_in_date_range(date_str: str, start_date: date | None, end_date: date | None) -> bool: + """检查日期字符串是否在指定范围内""" + if start_date is None and end_date is None: + return True + + try: + row_date = datetime.strptime(date_str[:10], "%Y-%m-%d").date() + except ValueError: + return False + + if start_date and row_date < start_date: + return False + if end_date and row_date > end_date: + return False + return True + + +def create_arg_parser(description: str) -> argparse.ArgumentParser: + """创建通用的命令行参数解析器""" + parser = argparse.ArgumentParser( + description=description, + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +日期筛选说明: + --year 指定年份(如 2026) + --month 指定月份(1-12) + --start 起始日期(YYYY-MM-DD) + --end 结束日期(YYYY-MM-DD) + + 多个条件同时指定时,取交集(最小日期范围) + +输出格式: + --format 输出格式:csv(默认)或 json + """ + ) + parser.add_argument("input_file", help="输入的账单CSV文件") + parser.add_argument("output_file", nargs="?", default=None, + help="输出文件(默认为 输入文件名_cleaned.csv/json)") + parser.add_argument("--year", "-y", type=str, default=None, + help="保留的年份(如 2026)") + parser.add_argument("--month", "-m", type=int, choices=range(1, 13), + metavar="1-12", help="保留的月份(1-12)") + parser.add_argument("--start", "-s", type=str, help="起始日期(YYYY-MM-DD)") + parser.add_argument("--end", "-e", type=str, help="结束日期(YYYY-MM-DD)") + parser.add_argument("--format", "-f", choices=["csv", "json"], default="csv", + help="输出格式:csv(默认)或 json") + return parser + + +def get_output_file(input_file: str, output_file: str | None, output_format: str = "csv") -> str: + """获取输出文件路径""" + if output_file: + return output_file + import os + base_name = os.path.splitext(input_file)[0] + ext = "json" if output_format == "json" else "csv" + return f"{base_name}_cleaned.{ext}" + + +# ============================================================================= +# 账单清理基类 +# ============================================================================= + +class BaseCleaner(ABC): + """账单清理基类""" + + def __init__(self, input_file: str, output_file: str | None = None, output_format: str = "csv"): + self.input_file = input_file + self.output_format = output_format + self.output_file = get_output_file(input_file, output_file, output_format) + self.start_date: date | None = None + self.end_date: date | None = None + + # 统计信息 + self.stats = { + "original_count": 0, + "filtered_count": 0, + "fully_refunded": 0, + "partially_refunded": 0, + "category_adjusted": 0, + "final_count": 0, + } + + def set_date_range(self, start_date: date | None, end_date: date | None): + """设置日期筛选范围""" + self.start_date = start_date + self.end_date = end_date + + def print_header(self): + """打印处理头信息""" + print(f"输入文件: {self.input_file}") + print(f"输出文件: {self.output_file}") + print(f"输出格式: {self.output_format.upper()}") + if self.start_date or self.end_date: + print(f"日期范围: {self.start_date or '不限'} ~ {self.end_date or '不限'}") + else: + print("日期范围: 全部") + print() + + def write_output(self, header: list, rows: list): + """ + 写入输出文件(支持 CSV 和 JSON 格式) + + Args: + header: 表头列表 + rows: 数据行列表 + """ + if self.output_format == "json": + self._write_json(header, rows) + else: + self._write_csv(header, rows) + + def _write_csv(self, header: list, rows: list): + """写入 CSV 格式""" + with open(self.output_file, "w", encoding="utf-8", newline="") as f: + writer = csv.writer(f) + writer.writerow(header) + writer.writerows(rows) + + def _write_json(self, header: list, rows: list): + """写入 JSON 格式""" + # 将每行转换为字典 + data = [] + for row in rows: + record = {} + for i, col in enumerate(header): + if i < len(row): + record[col] = row[i] + else: + record[col] = "" + data.append(record) + + with open(self.output_file, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=2) + + @abstractmethod + def clean(self) -> None: + """执行清理,子类实现""" + pass + + @abstractmethod + def reclassify(self, rows: list) -> list: + """ + 重新分类(子类实现) + + Args: + rows: 待处理的数据行 + + Returns: + 处理后的数据行 + """ + pass + diff --git a/analyzer/cleaners/wechat.py b/analyzer/cleaners/wechat.py new file mode 100644 index 0000000..87bf1f2 --- /dev/null +++ b/analyzer/cleaners/wechat.py @@ -0,0 +1,288 @@ +""" +微信账单清理模块 +""" +import csv +import re +from decimal import Decimal + +from .base import ( + BaseCleaner, parse_amount, format_amount, + is_in_date_range, create_arg_parser +) +from category import infer_category + + +# 与支付宝对齐的表头(包含"复核等级"字段) +ALIGNED_HEADER = [ + "交易时间", "交易分类", "交易对方", "对方账号", "商品说明", + "收/支", "金额", "收/付款方式", "交易状态", "交易订单号", "商家订单号", "备注", "复核等级" +] + + +class WechatCleaner(BaseCleaner): + """微信账单清理器""" + + def clean(self) -> None: + """执行清理""" + self.print_header() + + # 读取数据 + with open(self.input_file, "r", encoding="utf-8") as f: + reader = csv.reader(f) + header = next(reader) + rows = list(reader) + + self.stats["original_count"] = len(rows) + print(f"原始数据行数: {len(rows)}") + + # 第一步:按日期范围筛选 + rows_filtered = [ + row for row in rows + if row and is_in_date_range(row[0], self.start_date, self.end_date) + ] + self.stats["filtered_count"] = len(rows_filtered) + + date_desc = f"{self.start_date} ~ {self.end_date}" if self.start_date or self.end_date else "全部" + print(f"筛选后数据行数: {len(rows_filtered)} ({date_desc})") + + # 第二步:分离退款、支出、收入 + refund_rows, expense_rows, income_rows = self._separate_rows(rows_filtered) + + print(f"退款条目数: {len(refund_rows)}") + print(f"支出条目数: {len(expense_rows)}") + print(f"其他收入条目数: {len(income_rows)}") + + # 第三步:处理退款(包括转账退款) + final_expense_rows, income_rows = self._process_refunds(expense_rows, income_rows) + + print(f"\n处理结果:") + print(f" 全额退款删除: {self.stats['fully_refunded']} 条") + print(f" 部分退款调整: {self.stats['partially_refunded']} 条") + print(f" 保留支出条目: {len(final_expense_rows)} 条") + print(f" 保留收入条目: {len(income_rows)} 条") + + # 第四步:转换为对齐格式并重新分类 + aligned_expense = [self._convert_and_reclassify(r, remark) for r, remark in final_expense_rows] + aligned_income = [self._convert_and_reclassify((r, None), None) for r in income_rows] + + # 合并并排序 + final_rows = aligned_expense + aligned_income + final_rows.sort(key=lambda x: x[0], reverse=True) + + # 统计复核数量 + review_high_count = sum(1 for row in final_rows if row[-1] == "HIGH") + + self.stats["final_count"] = len(final_rows) + print(f" 最终保留行数: {len(final_rows)}") + if review_high_count > 0: + print(f" 高优先级复核: {review_high_count} 条(无法判断)") + + # 写入文件 + self.write_output(ALIGNED_HEADER, final_rows) + + print(f"\n清理后的数据已保存到: {self.output_file}") + + # 统计支出 + self._print_expense_summary(aligned_expense) + + def _separate_rows(self, rows: list) -> tuple[list, list, list]: + """分离退款、支出、收入记录""" + refund_rows = [] + expense_rows = [] + income_rows = [] + + for row in rows: + if len(row) < 6: + continue + + transaction_type = row[1] + income_expense = row[4] + + if "-退款" in transaction_type: + refund_rows.append(row) + elif income_expense == "支出": + expense_rows.append(row) + elif income_expense == "收入" and "-退款" not in transaction_type: + income_rows.append(row) + + return refund_rows, expense_rows, income_rows + + def _process_refunds(self, expense_rows: list, income_rows: list) -> tuple[list, list]: + """ + 处理退款(包括转账退款) + + 微信的退款有两种形式: + 1. 状态标注:支出记录的"当前状态"列标注"已退款" + 2. 转账退款:同一交易对方有收入记录(转账退回) + """ + # 3.1 识别转账退款 + transfer_refunds = {} + transfer_refund_rows = [] + + for row in income_rows: + merchant = row[2].strip() + amount = parse_amount(row[5]) + + # 检查是否有对应的支出记录 + has_matching_expense = any(exp[2].strip() == merchant for exp in expense_rows) + + if has_matching_expense: + if merchant not in transfer_refunds: + transfer_refunds[merchant] = Decimal("0") + transfer_refunds[merchant] += amount + transfer_refund_rows.append(row) + + # 从收入中移除已识别的转账退款 + for row in transfer_refund_rows: + income_rows.remove(row) + + if transfer_refunds: + print(f" 识别到转账退款: {len(transfer_refunds)} 笔") + + # 3.2 处理支出记录 + final_expense_rows = [] + + for row in expense_rows: + status = row[7] + merchant = row[2].strip() + original_amount = parse_amount(row[5]) + + # 计算总退款金额 + status_refund = Decimal("0") + transfer_refund = transfer_refunds.get(merchant, Decimal("0")) + + if "已全额退款" in status: + self.stats["fully_refunded"] += 1 + print(f" 全额退款删除: {row[0]} | {row[2]} | {row[3][:25]}... | {row[5]}") + continue + elif "已退款" in status: + status_refund = self._extract_refund_amount(status) or Decimal("0") + + total_refund = status_refund + transfer_refund + + if total_refund > 0: + if total_refund >= original_amount: + self.stats["fully_refunded"] += 1 + print(f" 全额退款删除: {row[0]} | {row[2]} | {row[3][:25]}... | {row[5]}") + else: + remaining = original_amount - total_refund + new_row = row.copy() + new_row[5] = f"¥{format_amount(remaining)}" + remark = f"原金额{row[5]},退款¥{total_refund}" + + final_expense_rows.append((new_row, remark)) + self.stats["partially_refunded"] += 1 + print(f" 部分退款: {row[0]} | {row[2]} | 原{row[5]} -> ¥{format_amount(remaining)}") + + if merchant in transfer_refunds: + del transfer_refunds[merchant] + else: + final_expense_rows.append((row, None)) + + return final_expense_rows, income_rows + + def _extract_refund_amount(self, status: str) -> Decimal | None: + """从状态中提取已退款金额""" + match = re.search(r'已退款[((]?¥?([\d.]+)[))]?', status) + if match: + return Decimal(match.group(1)) + if "已全额退款" in status: + return None + return Decimal("0") + + def _convert_and_reclassify(self, row_tuple: tuple, remark_override: str | None) -> list: + """ + 转换为对齐格式并重新分类 + + 微信原始字段: + 0: 交易时间, 1: 交易类型, 2: 交易对方, 3: 商品, + 4: 收/支, 5: 金额(元), 6: 支付方式, 7: 当前状态, + 8: 交易单号, 9: 商户单号, 10: 备注 + + 对齐后字段: + 交易时间, 交易分类, 交易对方, 对方账号, 商品说明, + 收/支, 金额, 收/付款方式, 交易状态, 交易订单号, 商家订单号, 备注, 需复核 + """ + if isinstance(row_tuple, tuple): + row, remark = row_tuple + else: + row, remark = row_tuple, None + + remark = remark_override if remark_override else remark + + transaction_time = row[0] + merchant = row[2] + product = row[3] + income_expense = row[4] + amount = parse_amount(row[5]) + payment_method = row[6] + status = row[7] + order_no = row[8] + merchant_order_no = row[9] if len(row) > 9 else "" + final_remark = remark if remark else (row[10] if len(row) > 10 else "/") + + # 重新分类(微信原始的"交易类型"太笼统) + category, is_certain = infer_category(merchant, product, income_expense) + + # 复核等级: 空=无需复核, HIGH=无法判断 + review_mark = "" if is_certain else "HIGH" + + return [ + transaction_time, + category, + merchant, + "/", # 对方账号(微信无此字段) + product, + income_expense, + format_amount(amount), + payment_method, + status, + order_no, + merchant_order_no, + final_remark, + review_mark + ] + + def reclassify(self, rows: list) -> list: + """ + 重新分类微信账单 + + 微信账单在 _convert_and_reclassify 中已完成分类 + 此方法为接口兼容保留 + """ + return rows + + def _print_expense_summary(self, expense_rows: list): + """打印支出统计""" + total = Decimal("0") + categories = {} + + for row in expense_rows: + if row[5] == "支出": + amt = Decimal(row[6]) + total += amt + cat = row[1] + categories[cat] = categories.get(cat, Decimal("0")) + amt + + print(f"清理后支出总额: ¥{total}") + print("\n=== 按分类统计 ===") + for cat, amt in sorted(categories.items(), key=lambda x: -x[1]): + print(f" {cat}: ¥{amt}") + + +def main(): + """命令行入口""" + parser = create_arg_parser("清理微信支付账单数据") + args = parser.parse_args() + + from .base import compute_date_range + + cleaner = WechatCleaner(args.input_file, args.output_file) + start_date, end_date = compute_date_range(args) + cleaner.set_date_range(start_date, end_date) + cleaner.clean() + + +if __name__ == "__main__": + main() + diff --git a/analyzer/config/category.yaml b/analyzer/config/category.yaml new file mode 100644 index 0000000..cd93abd --- /dev/null +++ b/analyzer/config/category.yaml @@ -0,0 +1,237 @@ +# ============================================================================= +# 交易分类关键词配置 +# 按优先级从上到下匹配,排在前面的分类优先级更高 +# ============================================================================= + +# ============================================================================= +# 需要重新分类的平台型商家 +# 这些平台的交易类型多样(外卖、电影、酒店、打车等),支付宝原分类可能不准确 +# 只有匹配到以下商家时才会触发分类调整逻辑 +# ============================================================================= +平台商家: + - 美团 + - 京东 + - 抖音 + - 饿了么 + - 拼多多 + - 携程 + - 金山便利店 + +# ============================================================================= +# 支出分类关键词 +# ============================================================================= +支出分类: + # 医疗健康 - 最高优先级 + 医疗健康: + - 医院 + - 药房 + - 药店 + - 诊所 + - 健康 + - 皮肤 + - 医疗 + - 体检 + - 口腔 + - 牙科 + - 眼科 + - 中医 + - 西医 + - 门诊 + - 挂号 + + # 交通出行 + 交通出行: + - 出行 + - 打车 + - 单车 + - 骑行 + - 骑安 # 广州骑安共享单车 + - 滴滴 + - 高德 + - 班车 + - 通勤 + - 公交 + - 地铁 + - 火车 + - 机票 + - 航空 + - 共享 + - 京庐 # 京庐出行 + - 哈啰 + - 美团单车 + - 青桔 + - 摩拜 + - 加油 + - 停车 + - 高速 + - 过路费 + - "12306" + - 携程 + - 飞猪 + + # 充值缴费 + 充值缴费: + - 充值 + - 缴费 + - 水费 + - 电费 + - 燃气 + - 话费 + - 流量 + - 宽带 + - 物业 + - 房租 + - 供暖 + - 暖气 + + # 运动健身 + 运动健身: + - 健身 + - 健身房 + - 运动 + - 滑雪 + - 冰雪 + - 游泳 + - 瑜伽 + - 跑步 + - 球场 + - 篮球 + - 足球 + - 羽毛球 + - 乒乓 + - 网球 + - 众雪 # 众雪滑雪用品店 + - 雪具 + - 滑板 + - 攀岩 + - 骑行装备 + + # 文化休闲(优先级高于日用百货,避免万达影城被识别为万达商场) + 文化休闲: + - 影城 # 电影院 + - 影院 + - 电影 + - 游戏 + - 娱乐 + - 旅游 + - 景区 + - 门票 + - 会员 + - 视频 + - 音乐 + - KTV + - 网吧 + - 桌游 + - 剧本杀 + - 密室 + - 酒吧 + - 演出 + - 演唱会 + - 酒店 # 美团酒店 + - 如家 + - 汉庭 + - 7天 + - 民宿 + + # 日用百货 - 盒马放在前面,避免被餐饮匹配 + 日用百货: + - 京邦达 # 京东物流 + - 快递 + - 拼多多 + - 便利店 + - 超市 + - 商场 + - 购物 + - 永辉 + - 华润 + - 万达 + - 苏宁 + - 国美 + - 得物 + - 唯品会 + - 可多 # Codow可多便利店 + - 全家 + - "7-11" + - 罗森 + - 美宜佳 + - 妍丽 # AFIONA妍丽美妆店 + - 屈臣氏 + - 丝芙兰 + + # 餐饮美食 + 餐饮美食: + - coffee + - 咖啡 + - luckin + - 瑞幸 + - 星巴克 + - starbucks + - 食堂 + - 订餐 + - 餐厅 + - 饭店 + - 饮品 + - 美食 + - 烧烤 + - 火锅 + - 面馆 + - 粥 + - 小吃 + - 甜品 + - 蛋糕 + - 麦当劳 + - 肯德基 + - 必胜客 + - 汉堡王 + - 德克士 + - 奶茶 + - 茶饮 + - 喜茶 + - 奈雪 + - 蜜雪 + - 茶百道 + - 书亦 + - 外卖 + - 饿了么 + - 美团外卖 + - 半秋山 # 西餐厅 + - 西餐 + - 中餐 + - 日料 + - 韩餐 + - 川菜 + - 湘菜 + - 酸汤 + - 黔有财 # 贵州酸汤火锅 + - 烤鸭 + - 臭豆腐 + - 炸鸡 + - 披萨 + - 寿司 + - 拉面 + - 米线 + - 螺蛳粉 + - 串串 + - 冒菜 + - 麻辣烫 + - 黄焖鸡 + - 煲仔饭 + - 蛙来哒 # 牛蛙餐厅 + - 粒上皇 # 炒货零食店 + + # 转账红包 + 转账红包: + - 红包 + - 转账给 + + +收入分类: + 退款: + - 退款 + 其他收入: [] + + +默认分类: + 支出: 其他支出 + 收入: 其他收入 + diff --git a/analyzer/requirements.txt b/analyzer/requirements.txt new file mode 100644 index 0000000..a08df0d --- /dev/null +++ b/analyzer/requirements.txt @@ -0,0 +1,2 @@ +pyyaml>=6.0 + diff --git a/clean_alipay_data.py b/clean_alipay_data.py deleted file mode 100644 index 33ceaa8..0000000 --- a/clean_alipay_data.py +++ /dev/null @@ -1,190 +0,0 @@ -""" -清理支付宝交易明细数据 -1. 仅保留指定年份的数据(默认2026年) -2. 对于退款的条目,找到对应的支出: - - 如果退款金额=支出金额,两条都删除 - - 如果退款金额<支出金额,保留差额并备注 - -用法: python clean_alipay_data.py <输入文件> [输出文件] [--year 年份] -示例: python clean_alipay_data.py 支付宝交易明细.csv output.csv --year 2026 -""" -import csv -import argparse -from decimal import Decimal, ROUND_HALF_UP - - -def parse_args(): - parser = argparse.ArgumentParser(description="清理支付宝交易明细数据") - parser.add_argument("input_file", help="输入的支付宝账单CSV文件") - parser.add_argument("output_file", nargs="?", default=None, help="输出文件(默认为 输入文件名_cleaned.csv)") - parser.add_argument("--year", type=str, default="2026", help="保留的年份(默认2026)") - return parser.parse_args() - - -def parse_amount(amount_str): - """解析金额字符串为Decimal""" - try: - return Decimal(amount_str.strip()) - except: - return Decimal("0") - - -def find_matching_expense(refund_row, expense_rows): - """ - 找到退款对应的支出记录 - 返回 (索引, 支出记录) 或 (None, None) - """ - if len(refund_row) < 11: - return None, None - - refund_order_no = refund_row[9].strip() # 交易订单号 - refund_merchant_no = refund_row[10].strip() # 商家订单号 - - # 退款的交易订单号通常包含原订单号(用_分隔) - original_order = refund_order_no.split("_")[0] if "_" in refund_order_no else refund_order_no - - for i, expense_row in enumerate(expense_rows): - if len(expense_row) >= 11: - expense_order_no = expense_row[9].strip() - expense_merchant_no = expense_row[10].strip() - - # 匹配条件:订单号相同 或 商家订单号相同 - if (original_order and expense_order_no == original_order) or \ - (refund_merchant_no and expense_merchant_no == refund_merchant_no): - return i, expense_row - - return None, None - - -def main(): - args = parse_args() - - input_file = args.input_file - output_file = args.output_file - year = args.year - - # 如果未指定输出文件,自动生成 - if output_file is None: - import os - base_name = os.path.splitext(input_file)[0] - output_file = f"{base_name}_cleaned.csv" - - print(f"输入文件: {input_file}") - print(f"输出文件: {output_file}") - print(f"保留年份: {year}") - print() - - # 读取所有数据 - with open(input_file, "r", encoding="utf-8") as f: - reader = csv.reader(f) - header = next(reader) - rows = list(reader) - - print(f"原始数据行数: {len(rows)}") - - # 第一步:筛选指定年份的数据 - rows_year = [] - for row in rows: - if row and row[0].startswith(year): - rows_year.append(row) - - print(f"{year}年数据行数: {len(rows_year)}") - - # 第二步:分离退款和非退款条目 - refund_rows = [] # 退款条目 - expense_rows = [] # 非退款条目 - - for row in rows_year: - if len(row) > 1 and row[1] == "退款": - refund_rows.append(row) - else: - expense_rows.append(row) - - print(f"退款条目数: {len(refund_rows)}") - print(f"非退款条目数: {len(expense_rows)}") - - # 第三步:处理退款,按订单号聚合退款金额 - # 一个支出可能有多笔退款 - order_refunds = {} # 订单号 -> 退款总额 - - for refund_row in refund_rows: - if len(refund_row) >= 11: - refund_order_no = refund_row[9].strip() - refund_merchant_no = refund_row[10].strip() - refund_amount = parse_amount(refund_row[6]) - - original_order = refund_order_no.split("_")[0] if "_" in refund_order_no else refund_order_no - - # 使用原订单号作为key - key = original_order if original_order else refund_merchant_no - if key: - if key not in order_refunds: - order_refunds[key] = Decimal("0") - order_refunds[key] += refund_amount - print(f" 退款记录: {refund_row[0]} | {refund_row[2]} | {refund_amount}元") - - print(f"有退款的订单数: {len(order_refunds)}") - - # 第四步:处理每笔支出 - final_rows = [] - fully_refunded = 0 - partially_refunded = 0 - - for row in expense_rows: - if len(row) >= 12: - order_no = row[9].strip() - merchant_no = row[10].strip() - expense_amount = parse_amount(row[6]) - - # 查找对应的退款 - refund_amount = Decimal("0") - matched_key = None - - for key, amount in order_refunds.items(): - if key and (order_no == key or merchant_no == key or order_no.startswith(key)): - refund_amount = amount - matched_key = key - break - - if matched_key: - if refund_amount >= expense_amount: - # 全额退款,删除该条目 - fully_refunded += 1 - print(f" 全额退款删除: {row[0]} | {row[2]} | {row[4][:25]}... | 原{expense_amount}元") - else: - # 部分退款,保留差额并备注 - remaining = expense_amount - refund_amount - remaining_str = str(remaining.quantize(Decimal("0.01"), rounding=ROUND_HALF_UP)) - - new_row = row.copy() - new_row[6] = remaining_str - # 在备注列添加说明 - original_remark = new_row[11] if len(new_row) > 11 else "" - new_row[11] = f"原金额{expense_amount}元,退款{refund_amount}元{';' + original_remark if original_remark else ''}" - - final_rows.append(new_row) - partially_refunded += 1 - print(f" 部分退款: {row[0]} | {row[2]} | 原{expense_amount}元 -> {remaining_str}元") - else: - # 无退款,保留原记录 - final_rows.append(row) - else: - final_rows.append(row) - - print(f"\n处理结果:") - print(f" 全额退款删除: {fully_refunded} 条") - print(f" 部分退款调整: {partially_refunded} 条") - print(f" 最终保留行数: {len(final_rows)}") - - # 写入清理后的数据 - with open(output_file, "w", encoding="utf-8", newline="") as f: - writer = csv.writer(f) - writer.writerow(header) - writer.writerows(final_rows) - - print(f"\n清理后的数据已保存到: {output_file}") - - -if __name__ == "__main__": - main() - diff --git a/clean_wechat_data.py b/clean_wechat_data.py deleted file mode 100644 index 7f79736..0000000 --- a/clean_wechat_data.py +++ /dev/null @@ -1,308 +0,0 @@ -""" -清理微信支付账单数据 -1. 仅保留指定年份的数据(默认2026年) -2. 对于退款的条目,找到对应的支出: - - 如果全额退款,两条都删除 - - 如果部分退款,保留差额并备注 -3. 字段格式与支付宝对齐 -4. 根据商户名称自动推断交易分类 - -用法: python clean_wechat_data.py <输入文件> [输出文件] [--year 年份] -示例: python clean_wechat_data.py 微信账单.csv output.csv --year 2026 -""" -import csv -import re -import argparse -from decimal import Decimal, ROUND_HALF_UP - - -def parse_args(): - parser = argparse.ArgumentParser(description="清理微信支付账单数据") - parser.add_argument("input_file", help="输入的微信账单CSV文件") - parser.add_argument("output_file", nargs="?", default=None, help="输出文件(默认为 输入文件名_cleaned.csv)") - parser.add_argument("--year", type=str, default="2026", help="保留的年份(默认2026)") - return parser.parse_args() - -# 与支付宝对齐的表头 -ALIGNED_HEADER = ["交易时间", "交易分类", "交易对方", "对方账号", "商品说明", "收/支", "金额", "收/付款方式", "交易状态", "交易订单号", "商家订单号", "备注"] - - -def parse_amount(amount_str): - """解析金额字符串为Decimal(去掉¥符号)""" - try: - # 去掉¥符号和空格 - clean = amount_str.replace("¥", "").replace(" ", "").strip() - return Decimal(clean) - except: - return Decimal("0") - - -def format_amount(amount): - """格式化金额为字符串(不带¥符号,与支付宝一致)""" - return str(amount.quantize(Decimal("0.01"), rounding=ROUND_HALF_UP)) - - -def extract_refund_amount(status): - """从状态中提取已退款金额""" - # 匹配 "已退款(¥1.00)" 或 "已退款¥1.00" 格式 - match = re.search(r'已退款[((]?¥?([\d.]+)[))]?', status) - if match: - return Decimal(match.group(1)) - if "已全额退款" in status: - return None # 表示全额退款,需要从支出金额获取 - return Decimal("0") - - -def infer_category(merchant, product, income_expense): - """根据商户名称和商品信息推断交易分类""" - merchant_lower = merchant.lower() - product_lower = product.lower() - combined = merchant_lower + " " + product_lower - - # 收入类 - if income_expense == "收入": - if "退款" in combined: - return "退款" - return "其他收入" - - # 餐饮美食 - food_keywords = ["coffee", "咖啡", "luckin", "瑞幸", "星巴克", "starbucks", - "食堂", "订餐", "餐", "饮", "茶", "奶茶", "饮品", "美食", - "烧烤", "火锅", "面", "饭", "粥", "小吃", "甜品", "蛋糕", - "盒马", "鲜生", "超市", "麦当劳", "肯德基", "必胜客"] - - # 交通出行 - transport_keywords = ["出行", "打车", "单车", "骑行", "骑安", "滴滴", "高德", - "班车", "通勤", "公交", "地铁", "火车", "机票", "航空", - "共享", "京庐", "哈啰", "美团单车", "青桔"] - - # 日用百货 - daily_keywords = ["沃尔玛", "walmart", "京东", "京邦达", "快递", "淘宝", - "天猫", "拼多多", "便利店", "超市", "商场", "购物"] - - # 医疗健康 - health_keywords = ["医院", "药", "诊所", "健康", "皮肤", "医疗", "体检"] - - # 文化休闲 - leisure_keywords = ["电影", "游戏", "娱乐", "健身", "运动", "滑雪", "冰雪", - "旅游", "景区", "门票", "会员", "视频", "音乐"] - - # 充值缴费 - recharge_keywords = ["充值", "缴费", "水费", "电费", "燃气", "话费", "流量"] - - # 按优先级匹配 - for kw in health_keywords: - if kw in combined: - return "医疗健康" - - for kw in transport_keywords: - if kw in combined: - return "交通出行" - - for kw in recharge_keywords: - if kw in combined: - return "充值缴费" - - for kw in leisure_keywords: - if kw in combined: - return "文化休闲" - - # 盒马特殊处理:如果是盒马但不是餐饮相关,归为日用百货 - if "盒马" in combined or "鲜生" in combined: - return "日用百货" - - for kw in food_keywords: - if kw in combined: - return "餐饮美食" - - for kw in daily_keywords: - if kw in combined: - return "日用百货" - - # 转账类 - if "转账" in combined: - return "其他支出" - - # 默认 - return "其他支出" - - -def convert_row_to_aligned_format(row, remark_override=None): - """ - 将微信原始行转换为与支付宝对齐的格式 - 微信原始: 交易时间,交易类型,交易对方,商品,收/支,金额(元),支付方式,当前状态,交易单号,商户单号,备注 - 对齐后: 交易时间,交易分类,交易对方,对方账号,商品说明,收/支,金额,收/付款方式,交易状态,交易订单号,商家订单号,备注 - """ - transaction_time = row[0] # 交易时间 - merchant = row[2] # 交易对方 - product = row[3] # 商品 -> 商品说明 - income_expense = row[4] # 收/支 - amount = parse_amount(row[5]) # 金额(元) -> 金额(去掉¥) - payment_method = row[6] # 支付方式 -> 收/付款方式 - status = row[7] # 当前状态 -> 交易状态 - order_no = row[8] # 交易单号 -> 交易订单号 - merchant_order_no = row[9] if len(row) > 9 else "" # 商户单号 -> 商家订单号 - remark = remark_override if remark_override else (row[10] if len(row) > 10 else "/") # 备注 - - # 推断交易分类 - category = infer_category(merchant, product, income_expense) - - # 对方账号(微信没有这个字段,用/填充) - account = "/" - - return [ - transaction_time, - category, - merchant, - account, - product, - income_expense, - format_amount(amount), - payment_method, - status, - order_no, - merchant_order_no, - remark - ] - - -def main(): - args = parse_args() - - input_file = args.input_file - output_file = args.output_file - year = args.year - - # 如果未指定输出文件,自动生成 - if output_file is None: - import os - base_name = os.path.splitext(input_file)[0] - output_file = f"{base_name}_cleaned.csv" - - print(f"输入文件: {input_file}") - print(f"输出文件: {output_file}") - print(f"保留年份: {year}") - print() - - # 读取所有数据 - with open(input_file, "r", encoding="utf-8") as f: - reader = csv.reader(f) - header = next(reader) - rows = list(reader) - - print(f"原始数据行数: {len(rows)}") - - # 第一步:筛选指定年份的数据 - rows_year = [] - for row in rows: - if row and row[0].startswith(year): - rows_year.append(row) - - print(f"{year}年数据行数: {len(rows_year)}") - - # 第二步:分离退款和非退款条目 - # 微信的退款在"交易类型"列(index 1)包含"-退款" - refund_rows = [] # 退款记录 - expense_rows = [] # 支出记录 - income_rows = [] # 收入记录(转账收入等,保留) - - for row in rows_year: - if len(row) < 6: - continue - - transaction_type = row[1] # 交易类型 - income_expense = row[4] # 收/支 - - if "-退款" in transaction_type: - refund_rows.append(row) - elif income_expense == "支出": - expense_rows.append(row) - elif income_expense == "收入": - # 收入但不是退款(如转账收入),保留 - if "-退款" not in transaction_type: - income_rows.append(row) - - print(f"退款条目数: {len(refund_rows)}") - print(f"支出条目数: {len(expense_rows)}") - print(f"其他收入条目数: {len(income_rows)}") - - # 第三步:处理退款 - # 微信账单中,已退款的支出在"当前状态"列会标注 - final_expense_rows = [] - fully_refunded = 0 - partially_refunded = 0 - - for row in expense_rows: - status = row[7] # 当前状态 - original_amount = parse_amount(row[5]) - - if "已全额退款" in status: - # 全额退款,删除 - fully_refunded += 1 - print(f" 全额退款删除: {row[0]} | {row[2]} | {row[3][:25]}... | {row[5]}") - elif "已退款" in status: - # 部分退款,计算差额 - refund_amt = extract_refund_amount(status) - if refund_amt and refund_amt < original_amount: - remaining = original_amount - refund_amt - - # 创建新行并设置调整后的金额 - new_row = row.copy() - new_row[5] = f"¥{format_amount(remaining)}" - remark = f"原金额{row[5]},退款¥{refund_amt}" - - final_expense_rows.append((new_row, remark)) - partially_refunded += 1 - print(f" 部分退款: {row[0]} | {row[2]} | 原{row[5]} -> ¥{format_amount(remaining)}") - else: - # 无法解析退款金额,保留原记录 - final_expense_rows.append((row, None)) - else: - # 正常支出,保留 - final_expense_rows.append((row, None)) - - print(f"\n处理结果:") - print(f" 全额退款删除: {fully_refunded} 条") - print(f" 部分退款调整: {partially_refunded} 条") - print(f" 保留支出条目: {len(final_expense_rows)} 条") - print(f" 保留收入条目: {len(income_rows)} 条") - - # 转换为对齐格式 - aligned_expense_rows = [convert_row_to_aligned_format(r, remark) for r, remark in final_expense_rows] - aligned_income_rows = [convert_row_to_aligned_format(r, None) for r in income_rows] - - # 合并所有保留的记录并按时间排序 - final_rows = aligned_expense_rows + aligned_income_rows - final_rows.sort(key=lambda x: x[0], reverse=True) # 按时间倒序 - - print(f" 最终保留行数: {len(final_rows)}") - - # 写入清理后的数据(使用与支付宝对齐的表头) - with open(output_file, "w", encoding="utf-8", newline="") as f: - writer = csv.writer(f) - writer.writerow(ALIGNED_HEADER) - writer.writerows(final_rows) - - print(f"\n清理后的数据已保存到: {output_file}") - - # 统计支出总额 - total = Decimal("0") - for row in aligned_expense_rows: - total += Decimal(row[6]) - print(f"清理后支出总额: ¥{total}") - - # 按分类统计 - print("\n=== 按分类统计 ===") - categories = {} - for row in aligned_expense_rows: - cat = row[1] - amt = Decimal(row[6]) - categories[cat] = categories.get(cat, Decimal("0")) + amt - - for cat, amt in sorted(categories.items(), key=lambda x: -x[1]): - print(f" {cat}: ¥{amt}") - - -if __name__ == "__main__": - main() - diff --git a/data/微信支付账单_cleaned.csv b/data/微信支付账单_cleaned.csv deleted file mode 100644 index ab02635..0000000 --- a/data/微信支付账单_cleaned.csv +++ /dev/null @@ -1,21 +0,0 @@ -交易时间,交易分类,交易对方,对方账号,商品说明,收/支,金额,收/付款方式,交易状态,交易订单号,商家订单号,备注 -2026-01-07 12:14:45,餐饮美食,luckin coffee,/,订单付款,支出,10.60,零钱,支付成功,4200002988202601073306005534,10118632317055910925,/ -2026-01-07 09:11:03,交通出行,广州骑安,/,先乘车后付款,支出,1.50,零钱,支付成功,4200003006202601073238995173,_Ly46HgeRPbIUW9BHqr3fCxxxxj9Hxpx,/ -2026-01-07 09:06:40,交通出行,武汉金山软件有限公司,/,武汉通勤班车,支出,1.00,零钱,支付成功,4200003000202601074388517102,DD202601070906345062740,/ -2026-01-07 09:01:42,交通出行,滴滴出行,/,单车,支出,0.75,零钱,支付成功,4200003002202601079452390587,249_202601078530562602584152,/ -2026-01-06 18:28:04,交通出行,武汉金山软件有限公司,/,武汉通勤班车,支出,1.00,零钱,支付成功,4200002947202601067459746970,DD202601061827587329234,/ -2026-01-06 14:17:03,医疗健康,玲珑塔专攻皮肤病13175597736,/,收款方备注:二维码收款,支出,40.00,零钱,已转账,53110001163125202601063842270340,10001073012026010600280843892079,/ -2026-01-06 10:50:23,其他收入,卖WPS会员一张,非金山,/,转账备注:微信转账,收入,80.00,/,已存入零钱,1000050001202601061428125460454,/,/ -2026-01-05 18:28:38,交通出行,武汉金山软件有限公司,/,武汉通勤班车,支出,1.00,零钱,支付成功,4200002921202601058125516164,DD202601051828328051569,/ -2026-01-04 20:40:57,交通出行,京庐出行,/,京庐出行-共享光谷区-充值结束订单,支出,3.00,零钱,支付成功,4200002984202601048880226170,02222601047445289709466,/ -2026-01-04 19:24:34,日用百货,盒马,/,盒马鲜生246363,支出,54.60,招商银行信用卡(9640),支付成功,4200002923202601042813509099,11190600726010471499604680796,/ -2026-01-04 18:38:16,交通出行,武汉金山软件有限公司,/,武汉通勤班车,支出,1.00,零钱,支付成功,4200002929202601042008960408,DD202601041838109920170,/ -2026-01-03 20:24:05,餐饮美食,武汉金韵园区运营管理有限公司,/,食堂订餐(早餐送餐),支出,9.00,零钱,支付成功,4200002948202601030263477446,BL_FB20260103202359683744910,/ -2026-01-03 20:23:30,餐饮美食,武汉金韵园区运营管理有限公司,/,食堂订餐(早餐送餐),支出,8.00,零钱,支付成功,4200002943202601035305474758,BL_FB20260103202324698536250,/ -2026-01-03 20:22:42,餐饮美食,武汉金韵园区运营管理有限公司,/,食堂订餐(早餐送餐),支出,9.00,零钱,支付成功,4200002929202601036820484684,BL_FB20260103202236564150078,/ -2026-01-03 20:21:58,餐饮美食,武汉金韵园区运营管理有限公司,/,食堂订餐(早餐送餐),支出,11.00,零钱,已退款(¥1.00),4200003001202601035434733752,BL_FB20260103202152842042925,"原金额¥12.00,退款¥1.00" -2026-01-03 20:21:26,餐饮美食,武汉金韵园区运营管理有限公司,/,食堂订餐(早餐送餐),支出,7.00,零钱,支付成功,4200002934202601036160923374,BL_FB20260103202119987137106,/ -2026-01-02 22:32:23,餐饮美食,武汉茶悦德饮品有限公司,/,武汉茶悦德饮品有限公司-消费,支出,14.00,零钱,支付成功,4200003009202601023794120565,260102185077059584-094886,/ -2026-01-02 15:43:56,运动健身,众雪Popsnowboard,/,转账备注:微信转账,支出,90.00,招商银行储蓄卡(3717),对方已收钱,53010002360217202601024108556496,1000050001202601021224301657619,"滑雪手套,原金额190.00,退款100.00" -2026-01-02 14:00:55,日用百货,北京京邦达贸易有限公司,/,京邦达四店_10021002601021358360705904325443_PJFJDX046848955934POS871788236953,支出,47.52,招商银行信用卡(9640),支付成功,4200002951202601028043023620,202601021463830050637166,/ -2026-01-02 12:18:07,日用百货,沃尔玛,/,WMDJ,支出,12.29,招商银行信用卡(9640),支付成功,4200002957202601026022508013,SJ610020260102000000040685507,/ diff --git a/data/支付宝交易明细_cleaned.csv b/data/支付宝交易明细_cleaned.csv deleted file mode 100644 index a741dc9..0000000 --- a/data/支付宝交易明细_cleaned.csv +++ /dev/null @@ -1,38 +0,0 @@ -交易时间,交易分类,交易对方,对方账号,商品说明,收/支,金额,收/付款方式,交易状态,交易订单号,商家订单号,备注, -2026-01-07 12:01:02,餐饮美食,武汉金韵园区运营管理有限公司,zha***@kingsoft.com,金山武汉食堂-烧腊:消费(扫码),支出,23.80,招商银行信用卡(9640),交易成功,2026010722001496171406648091 ,532755217159032832 ,, -2026-01-06 15:54:53,餐饮美食,友宝,sma***@ubox.cn,智能货柜消费_香巴佬酱香腿90g_消费时间:2026-01-06 15:53:38,支出,7.19,招商银行信用卡(9640),交易成功,2026010622001496171401693010 ,visionpayF57BD4F10C5814B8A201BC9D ,, -2026-01-06 11:55:10,餐饮美食,武汉金韵园区运营管理有限公司,zha***@kingsoft.com,金山武汉食堂-小碗菜餐线总:消费(扫码),支出,12.00,招商银行信用卡(9640),交易成功,2026010622001496171404821470 ,532391356409057280 ,, -2026-01-06 09:35:09,交通出行,高德打车,aut***@autonavi.com,高德打车订单,支出,16.09,招商银行信用卡(9640),交易成功,2026010622001496171401601675 ,0003N202601060000000013917689677 ,, -2026-01-05 18:59:11,餐饮美食,板栗,131******35,收钱码收款,支出,21.00,花呗,交易成功,2026010522001496171458859720 ,47676107513522013796171 ,, -2026-01-05 18:22:34,日用百货,武汉市金山便利店,/,立码收收款,支出,40.69,招商银行信用卡(9640)&红包,交易成功,2026010522001496171456603450 ,67126600004826000020260105Nf0000000006000630042410 ,, -2026-01-05 15:16:38,充值缴费,武汉供电公司,/,电费自动缴费-旭辉千山凌*-根据每期出账后自动缴费,支出,50.00,招商银行信用卡(9640),交易成功,2026010500003001170072253280 ,4219923167220 ,, -2026-01-05 13:49:13,餐饮美食,友宝,sma***@ubox.cn,智能货柜消费_维他柠檬茶250ml_消费时间:2026-01-05 13:48:20,支出,2.40,招商银行信用卡(9640),交易成功,2026010522001496171456645488 ,visionpayF57B487D159714B73323C474 ,, -2026-01-05 11:59:45,餐饮美食,武汉金韵园区运营管理有限公司,zha***@kingsoft.com,金山武汉食堂-小碗菜餐线总:消费(扫码),支出,9.00,招商银行信用卡(9640),交易成功,2026010522001496171456645431 ,532030121989640192 ,, -2026-01-05 09:36:44,交通出行,高德打车,aut***@autonavi.com,高德打车订单,支出,13.43,招商银行信用卡(9640),交易成功,2026010522001496171454848711 ,0003N202601050000000013904811300 ,, -2026-01-04 19:27:50,日用百货,福州朴朴电子商务有限公司,pay***@pupumall.com,朴朴商品订单,支出,52.77,招商银行信用卡(9640),交易成功,2026010422001496171451587426 ,0550767526070159PAY01 ,, -2026-01-04 17:06:22,餐饮美食,友宝,sma***@ubox.cn,智能货柜消费_潘岭香辣鸭翅根50g_消费时间:2026-01-04 17:06:07,支出,2.55,招商银行信用卡(9640),交易成功,2026010422001496171452135216 ,visionpayF57BD4F21A3814B60FFEAC75 ,, -2026-01-04 12:03:39,餐饮美食,武汉金韵园区运营管理有限公司,zha***@kingsoft.com,金山武汉食堂-烧腊:消费(扫码),支出,23.80,招商银行信用卡(9640),交易成功,2026010422001496171448457889 ,531668714131558400 ,, -2026-01-04 09:29:28,交通出行,高德打车,aut***@autonavi.com,高德打车订单,支出,22.86,招商银行信用卡(9640),交易成功,2026010422001496171451555302 ,0003N202601040000000013892625199 ,, -2026-01-04 09:04:05,餐饮美食,巴比鲜包,159******86,收钱码收款,支出,8.00,花呗,交易成功,2026010422001496171452803795 ,17674886453522013796171 ,, -2026-01-03 22:30:01,餐饮美食,美团,it_***@meituan.com,长沙臭豆腐(光谷店)-美团App-26010311100400001305933027499233,支出,20.88,招商银行信用卡(9640),交易成功,2026010322001496171449230231 ,20260103222958U94178426471674160 ,, -2026-01-03 17:04:37,家居家装,易安**士,293***@qq.com,A面色织水洗棉提花B版牛奶绒灰色四件套1.5m1.8秋冬舒适保暖北欧,支出,156.35,招商银行信用卡(9640),支付成功,20260103300000016996171253260649 ,T200P4964518946301689607 ,, -2026-01-03 13:44:03,餐饮美食,淘宝闪购,e50***@alibaba-inc.com,必胜客(光谷天地店)外卖订单,支出,55.00,招商银行信用卡(9640),交易成功,2026010322001196171449294735 ,13130600726010398453925283122 ,, -2026-01-03 10:16:31,充值缴费,湖北联通,/,手机充值,支出,50.00,招商银行信用卡(9640),充值成功,2026010300003001170071845503 ,Q0212525731991767406591 ,, -2026-01-03 00:17:12,交通出行,高德打车,aut***@autonavi.com,高德打车订单,支出,17.45,招商银行信用卡(9640),交易成功,2026010322001496171445541739 ,0003N202601030000000013847133090 ,, -2026-01-02 21:29:15,交通出行,高德打车,aut***@autonavi.com,高德打车订单,支出,20.65,招商银行信用卡(9640),交易成功,2026010222001496171444119944 ,0003N202601020000000013845028209 ,, -2026-01-02 15:39:08,交通出行,高德打车,aut***@autonavi.com,高德打车订单,支出,12.61,招商银行信用卡(9640),交易成功,2026010222001496171445120735 ,0003N202601020000000013865875701 ,, -2026-01-02 13:30:02,充值缴费,武汉市燃气集团有限公司,/,燃气费-*亮召,支出,300.00,招商银行信用卡(9640),交易成功,2026010200003001170071685230 ,6000332491 ,, -2026-01-02 12:06:04,餐饮美食,淘宝闪购,e50***@alibaba-inc.com,食寨香木甑饭(保利时代店)外卖订单,支出,17.38,招商银行信用卡(9640),交易成功,2026010222001196171441902610 ,13120600726010244642013283122 ,, -2026-01-02 12:04:27,运动健身,上海携程国际旅行社有限公司,xcg***@trip.com,武汉冰雪中心,支出,390.00,招商银行信用卡(9640),交易成功,2026010222001496171442735189 ,20260102ALPP000146956007 ,, -2026-01-02 11:05:33,充值缴费,中国移动,z97***@service.aliyun.com,为15927473526话费充值,支出,50.00,招商银行信用卡(9640),交易成功,2026010222001496171441484092 ,2026010200003100001766469371 ,, -2026-01-02 01:46:12,充值缴费,中国移动,zyd***@163.com,话费自动充值,支出,50.00,招商银行信用卡(9640),交易成功,2026010222001496171440772361 ,00952026010201461183897688099526 ,, -2026-01-01 21:42:18,文化休闲,雷神,fin***@leigod.com,超级会员-游戏修复大师工具,支出,88.00,招商银行信用卡(9640),交易成功,2026010122001496171440240373 ,0108-2026010121415753706110 ,, -2026-01-01 20:44:27,餐饮美食,美团,it_***@meituan.com,茶百道(武汉工程大学流芳校区店)-美团App-26010111100400001305571746839233,支出,6.85,招商银行信用卡(9640),交易成功,2026010122001496171437606703 ,20260101204424U80761740985734491 ,, -2026-01-01 20:42:34,餐饮美食,美团,it_***@meituan.com,南膳房北京烤鸭(光谷天地店)-美团App-26010111100400001305571194670233,支出,20.20,招商银行信用卡(9640),交易成功,2026010122001496171438917791 ,20260101204230U76025028895654959 ,, -2026-01-01 15:38:40,餐饮美食,淘宝闪购,e50***@alibaba-inc.com,米已成粥(软件园推荐必吃店)外卖订单,支出,19.90,招商银行信用卡(9640),交易成功,2026010122001196171435765243 ,13150600726010129328742283122 ,, -2026-01-01 14:58:20,家居家装,md**t,420***@qq.com,山复尔尔|ins盐系条纹全棉四件套中性黑白灰色系纯棉三件套1.8笠,支出,137.85,招商银行信用卡(9640),支付成功,20260101300000016096171232965838 ,T200P4964518946302689607 ,, -2026-01-01 14:57:26,数码电器,天**,tmc***@service.aliyun.com,闪魔适用iPhone17Promax钢化膜15苹果16Pro手机膜13/14全屏无尘仓,支出,22.24,招商银行信用卡(9640),等待确认收货,2026010122001196171439634715 ,T200P4982324509161689607 ,, -2026-01-01 14:29:47,文化休闲,广东南方新媒体股份有限公司,136***@qq.com,超级大会员连续包月,支出,25.00,招商银行信用卡(9640),交易成功,2026010122001496171436637054 ,CA20260101142928540127402047 ,, -2026-01-01 06:26:42,日用百货,福州**),c2m***@service.aliyun.com,【45双±3双】一次性天然竹筷子家用快餐碗筷外卖独立包装卫生,支出,0.34,招商银行信用卡(9640),支付成功,20260101300000012696171229655819 ,T200P4960300320533689607 ,, -2026-01-01 06:26:41,日用百货,里米**司,c2m***@service.aliyun.com,超细高拉力高分子细滑牙线棒一次性牙签牙线便携牙线盒牙线签剔牙,支出,0.33,招商银行信用卡(9640),支付成功,20260101300000012696171234753461 ,T200P4960300320535689607 ,, -2026-01-01 06:26:40,日用百货,美鑫**司,c2m***@service.aliyun.com,冰箱除味剂活性炭清洁除臭清新去除异味家用专用除味盒清洗剂神器,支出,0.33,招商银行信用卡(9640),支付成功,20260101300000012696171235335901 ,T200P4960300320534689607 ,, diff --git a/server/config.yaml b/server/config.yaml new file mode 100644 index 0000000..29479b2 --- /dev/null +++ b/server/config.yaml @@ -0,0 +1,18 @@ +# BillAI 服务器配置文件 + +# 服务配置 +server: + port: 8080 + +# Python 配置 +python: + # Python 解释器路径(相对于项目根目录或绝对路径) + path: analyzer/venv/bin/python + # 分析脚本路径(相对于项目根目录) + script: analyzer/clean_bill.py + +# 文件目录配置(相对于项目根目录) +directories: + upload: server/uploads + output: server/outputs + diff --git a/server/config/config.go b/server/config/config.go new file mode 100644 index 0000000..286df02 --- /dev/null +++ b/server/config/config.go @@ -0,0 +1,152 @@ +package config + +import ( + "flag" + "fmt" + "os" + "path/filepath" + + "gopkg.in/yaml.v3" +) + +// Config 服务配置 +type Config struct { + Port string // 服务端口 + ProjectRoot string // 项目根目录 + PythonPath string // Python 解释器路径 + CleanScript string // 清理脚本路径 + UploadDir string // 上传文件目录 + OutputDir string // 输出文件目录 +} + +// configFile YAML 配置文件结构 +type configFile struct { + Server struct { + Port int `yaml:"port"` + } `yaml:"server"` + Python struct { + Path string `yaml:"path"` + Script string `yaml:"script"` + } `yaml:"python"` + Directories struct { + Upload string `yaml:"upload"` + Output string `yaml:"output"` + } `yaml:"directories"` +} + +// Global 全局配置实例 +var Global Config + +// getEnvOrDefault 获取环境变量,如果不存在则返回默认值 +func getEnvOrDefault(key, defaultValue string) string { + if value := os.Getenv(key); value != "" { + return value + } + return defaultValue +} + +// getDefaultProjectRoot 获取默认项目根目录 +func getDefaultProjectRoot() string { + if root := os.Getenv("BILLAI_ROOT"); root != "" { + return root + } + exe, err := os.Executable() + if err == nil { + exeDir := filepath.Dir(exe) + if filepath.Base(exeDir) == "server" { + return filepath.Dir(exeDir) + } + } + cwd, _ := os.Getwd() + if filepath.Base(cwd) == "server" { + return filepath.Dir(cwd) + } + return cwd +} + +// getDefaultPythonPath 获取默认 Python 路径 +func getDefaultPythonPath() string { + if python := os.Getenv("BILLAI_PYTHON"); python != "" { + return python + } + return "analyzer/venv/bin/python" +} + +// loadConfigFile 加载 YAML 配置文件 +func loadConfigFile(configPath string) *configFile { + data, err := os.ReadFile(configPath) + if err != nil { + return nil + } + + var cfg configFile + if err := yaml.Unmarshal(data, &cfg); err != nil { + fmt.Printf("⚠️ 配置文件解析失败: %v\n", err) + return nil + } + + return &cfg +} + +// Load 加载配置 +func Load() { + var configFilePath string + flag.StringVar(&configFilePath, "config", "config.yaml", "配置文件路径") + flag.Parse() + + // 设置默认值 + Global.Port = getEnvOrDefault("PORT", "8080") + Global.ProjectRoot = getDefaultProjectRoot() + Global.PythonPath = getDefaultPythonPath() + Global.CleanScript = "analyzer/clean_bill.py" + Global.UploadDir = "server/uploads" + Global.OutputDir = "server/outputs" + + // 查找配置文件 + configPath := configFilePath + if !filepath.IsAbs(configPath) { + if _, err := os.Stat(configPath); os.IsNotExist(err) { + configPath = filepath.Join("server", configFilePath) + } + } + + // 加载配置文件 + if cfg := loadConfigFile(configPath); cfg != nil { + fmt.Printf("📄 加载配置文件: %s\n", configPath) + if cfg.Server.Port > 0 { + Global.Port = fmt.Sprintf("%d", cfg.Server.Port) + } + if cfg.Python.Path != "" { + Global.PythonPath = cfg.Python.Path + } + if cfg.Python.Script != "" { + Global.CleanScript = cfg.Python.Script + } + if cfg.Directories.Upload != "" { + Global.UploadDir = cfg.Directories.Upload + } + if cfg.Directories.Output != "" { + Global.OutputDir = cfg.Directories.Output + } + } + + // 环境变量覆盖 + if port := os.Getenv("PORT"); port != "" { + Global.Port = port + } + if python := os.Getenv("BILLAI_PYTHON"); python != "" { + Global.PythonPath = python + } + if root := os.Getenv("BILLAI_ROOT"); root != "" { + Global.ProjectRoot = root + } +} + +// ResolvePath 解析路径(相对路径转为绝对路径) +func ResolvePath(path string) string { + if filepath.IsAbs(path) { + return path + } + return filepath.Join(Global.ProjectRoot, path) +} + diff --git a/server/go.mod b/server/go.mod new file mode 100644 index 0000000..014ab4d --- /dev/null +++ b/server/go.mod @@ -0,0 +1,34 @@ +module billai-server + +go 1.21 + +require ( + github.com/gin-gonic/gin v1.9.1 + gopkg.in/yaml.v3 v3.0.1 +) + +require ( + github.com/bytedance/sonic v1.9.1 // indirect + github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect + github.com/gabriel-vasile/mimetype v1.4.2 // indirect + github.com/gin-contrib/sse v0.1.0 // indirect + github.com/go-playground/locales v0.14.1 // indirect + github.com/go-playground/universal-translator v0.18.1 // indirect + github.com/go-playground/validator/v10 v10.14.0 // indirect + github.com/goccy/go-json v0.10.2 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/cpuid/v2 v2.2.4 // indirect + github.com/leodido/go-urn v1.2.4 // indirect + github.com/mattn/go-isatty v0.0.19 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/pelletier/go-toml/v2 v2.0.8 // indirect + github.com/twitchyliquid64/golang-asm v0.15.1 // indirect + github.com/ugorji/go/codec v1.2.11 // indirect + golang.org/x/arch v0.3.0 // indirect + golang.org/x/crypto v0.9.0 // indirect + golang.org/x/net v0.10.0 // indirect + golang.org/x/sys v0.8.0 // indirect + golang.org/x/text v0.9.0 // indirect + google.golang.org/protobuf v1.30.0 // indirect +) diff --git a/server/go.sum b/server/go.sum new file mode 100644 index 0000000..1a77fa1 --- /dev/null +++ b/server/go.sum @@ -0,0 +1,86 @@ +github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= +github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s= +github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U= +github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= +github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams= +github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= +github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= +github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= +github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= +github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= +github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= +github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= +github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= +github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= +github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= +github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= +github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= +github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js= +github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU= +github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= +github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= +github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= +github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= +github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= +github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= +github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= +github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY= +github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= +github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= +github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= +github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= +golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= +golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= +golang.org/x/crypto v0.9.0 h1:LF6fAI+IutBocDJ2OT0Q1g8plpYljMZ4+lty+dsqw3g= +golang.org/x/crypto v0.9.0/go.mod h1:yrmDGqONDYtNj3tH8X9dzUun2m2lzPa9ngI6/RUPGR0= +golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= +google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/server/handler/review.go b/server/handler/review.go new file mode 100644 index 0000000..3fdc210 --- /dev/null +++ b/server/handler/review.go @@ -0,0 +1,72 @@ +package handler + +import ( + "net/http" + "os" + "path/filepath" + "strings" + + "github.com/gin-gonic/gin" + + "billai-server/config" + "billai-server/model" + "billai-server/service" +) + +// Review 获取需要复核的记录 +func Review(c *gin.Context) { + // 获取文件名参数 + fileName := c.Query("file") + if fileName == "" { + c.JSON(http.StatusBadRequest, model.ReviewResponse{ + Result: false, + Message: "请提供文件名参数 (file)", + }) + return + } + + // 构建文件路径 + outputDirAbs := config.ResolvePath(config.Global.OutputDir) + filePath := filepath.Join(outputDirAbs, fileName) + + // 检查文件是否存在 + if _, err := os.Stat(filePath); os.IsNotExist(err) { + c.JSON(http.StatusNotFound, model.ReviewResponse{ + Result: false, + Message: "文件不存在: " + fileName, + }) + return + } + + // 判断文件格式 + format := "csv" + if strings.HasSuffix(fileName, ".json") { + format = "json" + } + + // 提取需要复核的记录 + records := service.ExtractNeedsReview(filePath, format) + + // 统计高低优先级数量 + highCount := 0 + lowCount := 0 + for _, r := range records { + if r.ReviewLevel == "HIGH" { + highCount++ + } else if r.ReviewLevel == "LOW" { + lowCount++ + } + } + + c.JSON(http.StatusOK, model.ReviewResponse{ + Result: true, + Message: "获取成功", + Data: &model.ReviewData{ + Total: len(records), + High: highCount, + Low: lowCount, + Records: records, + }, + }) +} + diff --git a/server/handler/upload.go b/server/handler/upload.go new file mode 100644 index 0000000..87d19c9 --- /dev/null +++ b/server/handler/upload.go @@ -0,0 +1,119 @@ +package handler + +import ( + "fmt" + "io" + "net/http" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/gin-gonic/gin" + + "billai-server/config" + "billai-server/model" +) + +// Upload 处理账单上传和清理请求 +func Upload(c *gin.Context) { + // 1. 获取上传的文件 + file, header, err := c.Request.FormFile("file") + if err != nil { + c.JSON(http.StatusBadRequest, model.UploadResponse{ + Result: false, + Message: "请上传账单文件 (参数名: file)", + }) + return + } + defer file.Close() + + // 2. 解析请求参数 + var req model.UploadRequest + c.ShouldBind(&req) + if req.Format == "" { + req.Format = "csv" + } + + // 3. 保存上传的文件 + timestamp := time.Now().Format("20060102_150405") + inputFileName := fmt.Sprintf("%s_%s", timestamp, header.Filename) + uploadDirAbs := config.ResolvePath(config.Global.UploadDir) + inputPath := filepath.Join(uploadDirAbs, inputFileName) + + dst, err := os.Create(inputPath) + if err != nil { + c.JSON(http.StatusInternalServerError, model.UploadResponse{ + Result: false, + Message: "保存文件失败: " + err.Error(), + }) + return + } + defer dst.Close() + io.Copy(dst, file) + + // 4. 构建输出文件路径 + baseName := strings.TrimSuffix(header.Filename, filepath.Ext(header.Filename)) + outputExt := ".csv" + if req.Format == "json" { + outputExt = ".json" + } + outputFileName := fmt.Sprintf("%s_%s_cleaned%s", timestamp, baseName, outputExt) + outputDirAbs := config.ResolvePath(config.Global.OutputDir) + outputPath := filepath.Join(outputDirAbs, outputFileName) + + // 5. 构建命令参数 + cleanScriptAbs := config.ResolvePath(config.Global.CleanScript) + args := []string{cleanScriptAbs, inputPath, outputPath} + if req.Year != "" { + args = append(args, "--year", req.Year) + } + if req.Month != "" { + args = append(args, "--month", req.Month) + } + if req.Start != "" { + args = append(args, "--start", req.Start) + } + if req.End != "" { + args = append(args, "--end", req.End) + } + if req.Format != "" { + args = append(args, "--format", req.Format) + } + + // 6. 执行 Python 脚本 + pythonPathAbs := config.ResolvePath(config.Global.PythonPath) + cmd := exec.Command(pythonPathAbs, args...) + cmd.Dir = config.Global.ProjectRoot + output, err := cmd.CombinedOutput() + outputStr := string(output) + + if err != nil { + c.JSON(http.StatusInternalServerError, model.UploadResponse{ + Result: false, + Message: "处理失败: " + err.Error(), + }) + return + } + + // 7. 检测账单类型 + billType := "" + if strings.Contains(outputStr, "支付宝") { + billType = "alipay" + } else if strings.Contains(outputStr, "微信") { + billType = "wechat" + } + + // 8. 返回成功响应 + c.JSON(http.StatusOK, model.UploadResponse{ + Result: true, + Message: "处理成功", + Data: &model.UploadData{ + BillType: billType, + FileURL: fmt.Sprintf("/download/%s", outputFileName), + FileName: outputFileName, + }, + }) +} + diff --git a/server/main.go b/server/main.go new file mode 100644 index 0000000..368d687 --- /dev/null +++ b/server/main.go @@ -0,0 +1,89 @@ +package main + +import ( + "fmt" + "net/http" + "os" + + "github.com/gin-gonic/gin" + + "billai-server/config" + "billai-server/handler" +) + +func main() { + // 加载配置 + config.Load() + + // 解析路径 + uploadDirAbs := config.ResolvePath(config.Global.UploadDir) + outputDirAbs := config.ResolvePath(config.Global.OutputDir) + pythonPathAbs := config.ResolvePath(config.Global.PythonPath) + + // 确保目录存在 + os.MkdirAll(uploadDirAbs, 0755) + os.MkdirAll(outputDirAbs, 0755) + + // 打印配置信息 + printBanner(pythonPathAbs, uploadDirAbs, outputDirAbs) + + // 检查 Python 是否存在 + if _, err := os.Stat(pythonPathAbs); os.IsNotExist(err) { + fmt.Printf("⚠️ 警告: Python 路径不存在: %s\n", pythonPathAbs) + fmt.Println(" 请在配置文件中指定正确的 Python 路径") + } + + // 创建路由 + r := gin.Default() + + // 注册路由 + setupRoutes(r, outputDirAbs, pythonPathAbs) + + // 启动服务 + printAPIInfo() + r.Run(":" + config.Global.Port) +} + +// setupRoutes 设置路由 +func setupRoutes(r *gin.Engine, outputDirAbs, pythonPathAbs string) { + // 健康检查 + r.GET("/health", func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{ + "status": "ok", + "python_path": pythonPathAbs, + }) + }) + + // API 路由 + api := r.Group("/api") + { + api.POST("/upload", handler.Upload) + api.GET("/review", handler.Review) + } + + // 静态文件下载 + r.Static("/download", outputDirAbs) +} + +// printBanner 打印启动横幅 +func printBanner(pythonPath, uploadDir, outputDir string) { + fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━") + fmt.Println("📦 BillAI 账单分析服务") + fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━") + fmt.Printf("📁 项目根目录: %s\n", config.Global.ProjectRoot) + fmt.Printf("🐍 Python路径: %s\n", pythonPath) + fmt.Printf("📂 上传目录: %s\n", uploadDir) + fmt.Printf("📂 输出目录: %s\n", outputDir) + fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━") +} + +// printAPIInfo 打印 API 信息 +func printAPIInfo() { + fmt.Printf("\n🚀 服务已启动: http://localhost:%s\n", config.Global.Port) + fmt.Println("📝 API 接口:") + fmt.Println(" POST /api/upload - 上传并分析账单") + fmt.Println(" GET /api/review - 获取需要复核的记录") + fmt.Println(" GET /download/* - 下载结果文件") + fmt.Println(" GET /health - 健康检查") + fmt.Println() +} diff --git a/server/model/request.go b/server/model/request.go new file mode 100644 index 0000000..031562e --- /dev/null +++ b/server/model/request.go @@ -0,0 +1,11 @@ +package model + +// UploadRequest 上传请求参数 +type UploadRequest struct { + Year string `form:"year"` // 年份筛选 + Month string `form:"month"` // 月份筛选 + Start string `form:"start"` // 起始日期 + End string `form:"end"` // 结束日期 + Format string `form:"format"` // 输出格式: csv/json +} + diff --git a/server/model/response.go b/server/model/response.go new file mode 100644 index 0000000..11b806f --- /dev/null +++ b/server/model/response.go @@ -0,0 +1,43 @@ +package model + +// UploadData 上传响应数据 +type UploadData struct { + BillType string `json:"bill_type,omitempty"` // alipay/wechat + FileURL string `json:"file_url,omitempty"` // 下载链接 + FileName string `json:"file_name,omitempty"` // 文件名 +} + +// UploadResponse 上传响应 +type UploadResponse struct { + Result bool `json:"result"` + Message string `json:"message"` + Data *UploadData `json:"data,omitempty"` +} + +// ReviewRecord 需要复核的记录 +type ReviewRecord struct { + Time string `json:"time"` // 交易时间 + Category string `json:"category"` // 交易分类 + Merchant string `json:"merchant"` // 交易对方 + Description string `json:"description"` // 商品说明 + IncomeExpense string `json:"income_expense"` // 收/支 + Amount string `json:"amount"` // 金额 + Remark string `json:"remark"` // 备注 + ReviewLevel string `json:"review_level"` // 复核等级: HIGH/LOW +} + +// ReviewData 复核响应数据 +type ReviewData struct { + Total int `json:"total"` // 总数 + High int `json:"high"` // 高优先级数量 + Low int `json:"low"` // 低优先级数量 + Records []ReviewRecord `json:"records,omitempty"` // 需要复核的记录 +} + +// ReviewResponse 复核记录响应 +type ReviewResponse struct { + Result bool `json:"result"` + Message string `json:"message"` + Data *ReviewData `json:"data,omitempty"` +} + diff --git a/server/service/extractor.go b/server/service/extractor.go new file mode 100644 index 0000000..7e3d263 --- /dev/null +++ b/server/service/extractor.go @@ -0,0 +1,134 @@ +package service + +import ( + "encoding/csv" + "encoding/json" + "os" + + "billai-server/model" +) + +// ExtractNeedsReview 从输出文件中提取需要复核的记录 +func ExtractNeedsReview(filePath string, format string) []model.ReviewRecord { + if format == "json" { + return extractFromJSON(filePath) + } + return extractFromCSV(filePath) +} + +// extractFromCSV 从 CSV 文件提取需要复核的记录 +func extractFromCSV(filePath string) []model.ReviewRecord { + var records []model.ReviewRecord + + file, err := os.Open(filePath) + if err != nil { + return records + } + defer file.Close() + + reader := csv.NewReader(file) + rows, err := reader.ReadAll() + if err != nil || len(rows) < 2 { + return records + } + + // 找到各列的索引 + header := rows[0] + colIdx := make(map[string]int) + for i, col := range header { + colIdx[col] = i + } + + reviewIdx, ok := colIdx["复核等级"] + if !ok { + return records + } + + // 提取需要复核的记录 + for _, row := range rows[1:] { + if len(row) > reviewIdx && (row[reviewIdx] == "HIGH" || row[reviewIdx] == "LOW") { + record := model.ReviewRecord{ + ReviewLevel: row[reviewIdx], + } + if idx, ok := colIdx["交易时间"]; ok && len(row) > idx { + record.Time = row[idx] + } + if idx, ok := colIdx["交易分类"]; ok && len(row) > idx { + record.Category = row[idx] + } + if idx, ok := colIdx["交易对方"]; ok && len(row) > idx { + record.Merchant = row[idx] + } + if idx, ok := colIdx["商品说明"]; ok && len(row) > idx { + record.Description = row[idx] + } + if idx, ok := colIdx["收/支"]; ok && len(row) > idx { + record.IncomeExpense = row[idx] + } + if idx, ok := colIdx["金额"]; ok && len(row) > idx { + record.Amount = row[idx] + } + if idx, ok := colIdx["备注"]; ok && len(row) > idx { + record.Remark = row[idx] + } + records = append(records, record) + } + } + + return records +} + +// extractFromJSON 从 JSON 文件提取需要复核的记录 +func extractFromJSON(filePath string) []model.ReviewRecord { + var records []model.ReviewRecord + + file, err := os.Open(filePath) + if err != nil { + return records + } + defer file.Close() + + var data []map[string]interface{} + decoder := json.NewDecoder(file) + if err := decoder.Decode(&data); err != nil { + return records + } + + for _, item := range data { + reviewLevel, ok := item["复核等级"].(string) + if !ok || (reviewLevel != "HIGH" && reviewLevel != "LOW") { + continue + } + + record := model.ReviewRecord{ + ReviewLevel: reviewLevel, + } + + if v, ok := item["交易时间"].(string); ok { + record.Time = v + } + if v, ok := item["交易分类"].(string); ok { + record.Category = v + } + if v, ok := item["交易对方"].(string); ok { + record.Merchant = v + } + if v, ok := item["商品说明"].(string); ok { + record.Description = v + } + if v, ok := item["收/支"].(string); ok { + record.IncomeExpense = v + } + if v, ok := item["金额"].(string); ok { + record.Amount = v + } + if v, ok := item["备注"].(string); ok { + record.Remark = v + } + + records = append(records, record) + } + + return records +} +