refactor: 重构项目结构
- 将 Python 代码移至 analyzer/ 目录(含 venv) - 拆分 Go 服务器代码为模块化结构: - config/: 配置加载 - model/: 请求/响应模型 - service/: 业务逻辑 - handler/: API处理器 - 添加 .gitignore 文件 - 删除旧的独立脚本文件
This commit is contained in:
9
analyzer/cleaners/__init__.py
Normal file
9
analyzer/cleaners/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""
|
||||
账单清理模块
|
||||
"""
|
||||
from .base import BaseCleaner
|
||||
from .alipay import AlipayCleaner
|
||||
from .wechat import WechatCleaner
|
||||
|
||||
__all__ = ['BaseCleaner', 'AlipayCleaner', 'WechatCleaner']
|
||||
|
||||
231
analyzer/cleaners/alipay.py
Normal file
231
analyzer/cleaners/alipay.py
Normal file
@@ -0,0 +1,231 @@
|
||||
"""
|
||||
支付宝账单清理模块
|
||||
"""
|
||||
import csv
|
||||
from decimal import Decimal
|
||||
|
||||
from .base import (
|
||||
BaseCleaner, parse_amount, format_amount,
|
||||
is_in_date_range, create_arg_parser
|
||||
)
|
||||
from category import reclassify_if_needed, get_platform_merchants
|
||||
|
||||
|
||||
class AlipayCleaner(BaseCleaner):
|
||||
"""支付宝账单清理器"""
|
||||
|
||||
def clean(self) -> None:
|
||||
"""执行清理"""
|
||||
self.print_header()
|
||||
|
||||
# 读取数据
|
||||
with open(self.input_file, "r", encoding="utf-8") as f:
|
||||
reader = csv.reader(f)
|
||||
header = next(reader)
|
||||
rows = list(reader)
|
||||
|
||||
self.stats["original_count"] = len(rows)
|
||||
print(f"原始数据行数: {len(rows)}")
|
||||
|
||||
# 第一步:按日期范围筛选
|
||||
rows_filtered = [
|
||||
row for row in rows
|
||||
if row and is_in_date_range(row[0], self.start_date, self.end_date)
|
||||
]
|
||||
self.stats["filtered_count"] = len(rows_filtered)
|
||||
|
||||
date_desc = f"{self.start_date} ~ {self.end_date}" if self.start_date or self.end_date else "全部"
|
||||
print(f"筛选后数据行数: {len(rows_filtered)} ({date_desc})")
|
||||
|
||||
# 第二步:分离退款和非退款条目
|
||||
refund_rows = []
|
||||
expense_rows = []
|
||||
|
||||
for row in rows_filtered:
|
||||
if len(row) > 1 and row[1] == "退款":
|
||||
refund_rows.append(row)
|
||||
else:
|
||||
expense_rows.append(row)
|
||||
|
||||
print(f"退款条目数: {len(refund_rows)}")
|
||||
print(f"非退款条目数: {len(expense_rows)}")
|
||||
|
||||
# 第三步:处理退款
|
||||
order_refunds = self._aggregate_refunds(refund_rows)
|
||||
print(f"有退款的订单数: {len(order_refunds)}")
|
||||
|
||||
# 第四步:处理每笔支出
|
||||
final_rows = self._process_expenses(expense_rows, order_refunds)
|
||||
|
||||
print(f"\n处理结果:")
|
||||
print(f" 全额退款删除: {self.stats['fully_refunded']} 条")
|
||||
print(f" 部分退款调整: {self.stats['partially_refunded']} 条")
|
||||
print(f" 最终保留行数: {len(final_rows)}")
|
||||
|
||||
# 第五步:重新分类并添加"需复核"标注
|
||||
final_rows = self.reclassify(final_rows, header)
|
||||
|
||||
if self.stats["category_adjusted"] > 0:
|
||||
print(f" 分类调整: {self.stats['category_adjusted']} 条")
|
||||
|
||||
self.stats["final_count"] = len(final_rows)
|
||||
|
||||
# 写入文件
|
||||
self.write_output(header, final_rows)
|
||||
|
||||
print(f"\n清理后的数据已保存到: {self.output_file}")
|
||||
|
||||
def _aggregate_refunds(self, refund_rows: list) -> dict:
|
||||
"""聚合退款金额"""
|
||||
order_refunds = {}
|
||||
|
||||
for row in refund_rows:
|
||||
if len(row) >= 11:
|
||||
refund_order_no = row[9].strip()
|
||||
refund_merchant_no = row[10].strip()
|
||||
refund_amount = parse_amount(row[6])
|
||||
|
||||
original_order = refund_order_no.split("_")[0] if "_" in refund_order_no else refund_order_no
|
||||
key = original_order if original_order else refund_merchant_no
|
||||
|
||||
if key:
|
||||
if key not in order_refunds:
|
||||
order_refunds[key] = Decimal("0")
|
||||
order_refunds[key] += refund_amount
|
||||
print(f" 退款记录: {row[0]} | {row[2]} | {refund_amount}元")
|
||||
|
||||
return order_refunds
|
||||
|
||||
def _process_expenses(self, expense_rows: list, order_refunds: dict) -> list:
|
||||
"""处理支出记录"""
|
||||
final_rows = []
|
||||
|
||||
for row in expense_rows:
|
||||
if len(row) >= 12:
|
||||
order_no = row[9].strip()
|
||||
merchant_no = row[10].strip()
|
||||
expense_amount = parse_amount(row[6])
|
||||
|
||||
# 查找对应的退款
|
||||
refund_amount = Decimal("0")
|
||||
matched_key = None
|
||||
|
||||
for key, amount in order_refunds.items():
|
||||
if key and (order_no == key or merchant_no == key or order_no.startswith(key)):
|
||||
refund_amount = amount
|
||||
matched_key = key
|
||||
break
|
||||
|
||||
if matched_key:
|
||||
if refund_amount >= expense_amount:
|
||||
# 全额退款,删除
|
||||
self.stats["fully_refunded"] += 1
|
||||
print(f" 全额退款删除: {row[0]} | {row[2]} | {row[4][:25]}... | 原{expense_amount}元")
|
||||
else:
|
||||
# 部分退款,保留差额
|
||||
remaining = expense_amount - refund_amount
|
||||
new_row = row.copy()
|
||||
new_row[6] = format_amount(remaining)
|
||||
|
||||
original_remark = new_row[11] if len(new_row) > 11 else ""
|
||||
new_row[11] = f"原金额{expense_amount}元,退款{refund_amount}元{';' + original_remark if original_remark else ''}"
|
||||
|
||||
final_rows.append(new_row)
|
||||
self.stats["partially_refunded"] += 1
|
||||
print(f" 部分退款: {row[0]} | {row[2]} | 原{expense_amount}元 -> {format_amount(remaining)}元")
|
||||
else:
|
||||
final_rows.append(row)
|
||||
else:
|
||||
final_rows.append(row)
|
||||
|
||||
return final_rows
|
||||
|
||||
def _is_platform_merchant(self, merchant: str) -> bool:
|
||||
"""判断是否为平台型商家(从配置文件读取)"""
|
||||
platform_merchants = get_platform_merchants()
|
||||
return any(platform in merchant for platform in platform_merchants)
|
||||
|
||||
def reclassify(self, rows: list, header: list) -> list:
|
||||
"""
|
||||
重新分类支付宝账单,并添加"复核等级"标注字段
|
||||
|
||||
只对平台型商家(美团、京东、抖音等)进行分类调整,
|
||||
其他商家直接信任支付宝原分类。
|
||||
|
||||
复核等级:
|
||||
空 = 无需复核
|
||||
低 = 分类被调整,需确认调整是否正确
|
||||
高 = 完全无法判断,需人工分类
|
||||
|
||||
字段索引:
|
||||
0: 交易时间
|
||||
1: 交易分类
|
||||
2: 交易对方
|
||||
4: 商品说明
|
||||
5: 收/支
|
||||
"""
|
||||
# 添加"复核等级"字段到表头
|
||||
if "复核等级" not in header:
|
||||
header.append("复核等级")
|
||||
|
||||
review_low_count = 0
|
||||
review_high_count = 0
|
||||
|
||||
for row in rows:
|
||||
if len(row) >= 6:
|
||||
original_category = row[1]
|
||||
merchant = row[2]
|
||||
product = row[4]
|
||||
income_expense = row[5]
|
||||
|
||||
review_mark = ""
|
||||
|
||||
# 只对平台型商家进行重新分类
|
||||
if self._is_platform_merchant(merchant):
|
||||
new_category, changed, review_level = reclassify_if_needed(
|
||||
original_category, merchant, product, income_expense
|
||||
)
|
||||
|
||||
if changed:
|
||||
row[1] = new_category
|
||||
self.stats["category_adjusted"] += 1
|
||||
print(f" 分类调整: {merchant[:15]}... | {original_category} -> {new_category}")
|
||||
|
||||
# 添加复核等级标注
|
||||
if review_level == 1:
|
||||
review_mark = "LOW"
|
||||
review_low_count += 1
|
||||
elif review_level == 2:
|
||||
review_mark = "HIGH"
|
||||
review_high_count += 1
|
||||
# 非平台商家:直接信任支付宝原分类,无需复核
|
||||
|
||||
# 确保行长度足够
|
||||
while len(row) < len(header) - 1:
|
||||
row.append("")
|
||||
row.append(review_mark)
|
||||
|
||||
if review_high_count > 0:
|
||||
print(f" 高优先级复核: {review_high_count} 条(无法判断)")
|
||||
if review_low_count > 0:
|
||||
print(f" 低优先级复核: {review_low_count} 条(分类已调整)")
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def main():
|
||||
"""命令行入口"""
|
||||
parser = create_arg_parser("清理支付宝交易明细数据")
|
||||
args = parser.parse_args()
|
||||
|
||||
from .base import get_output_file, compute_date_range
|
||||
|
||||
cleaner = AlipayCleaner(args.input_file, args.output_file)
|
||||
start_date, end_date = compute_date_range(args)
|
||||
cleaner.set_date_range(start_date, end_date)
|
||||
cleaner.clean()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
240
analyzer/cleaners/base.py
Normal file
240
analyzer/cleaners/base.py
Normal file
@@ -0,0 +1,240 @@
|
||||
"""
|
||||
账单清理基类和公共工具函数
|
||||
"""
|
||||
import csv
|
||||
import json
|
||||
import argparse
|
||||
from abc import ABC, abstractmethod
|
||||
from datetime import datetime, date, timedelta
|
||||
from decimal import Decimal, ROUND_HALF_UP
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 公共工具函数
|
||||
# =============================================================================
|
||||
|
||||
def parse_date(date_str: str) -> date:
|
||||
"""解析日期字符串,支持 YYYY-MM-DD 或 YYYY/MM/DD 格式"""
|
||||
for fmt in ("%Y-%m-%d", "%Y/%m/%d"):
|
||||
try:
|
||||
return datetime.strptime(date_str, fmt).date()
|
||||
except ValueError:
|
||||
continue
|
||||
raise ValueError(f"无法解析日期: {date_str},请使用 YYYY-MM-DD 格式")
|
||||
|
||||
|
||||
def parse_amount(amount_str: str) -> Decimal:
|
||||
"""解析金额字符串为Decimal(去掉¥符号)"""
|
||||
try:
|
||||
clean = amount_str.replace("¥", "").replace(" ", "").strip()
|
||||
return Decimal(clean)
|
||||
except:
|
||||
return Decimal("0")
|
||||
|
||||
|
||||
def format_amount(amount: Decimal) -> str:
|
||||
"""格式化金额为字符串(保留两位小数)"""
|
||||
return str(amount.quantize(Decimal("0.01"), rounding=ROUND_HALF_UP))
|
||||
|
||||
|
||||
def compute_date_range(args) -> tuple[date | None, date | None]:
|
||||
"""
|
||||
根据参数计算最终的日期范围
|
||||
多重指定时取交集(最小范围)
|
||||
|
||||
Returns:
|
||||
(start_date, end_date) 或 (None, None) 表示不筛选
|
||||
"""
|
||||
start_date = None
|
||||
end_date = None
|
||||
|
||||
# 1. 根据年份设置范围
|
||||
if args.year:
|
||||
year = int(args.year)
|
||||
start_date = date(year, 1, 1)
|
||||
end_date = date(year, 12, 31)
|
||||
|
||||
# 2. 根据月份进一步收窄
|
||||
if args.month:
|
||||
month = int(args.month)
|
||||
year = int(args.year) if args.year else datetime.now().year
|
||||
|
||||
if not start_date:
|
||||
start_date = date(year, 1, 1)
|
||||
end_date = date(year, 12, 31)
|
||||
|
||||
month_start = date(year, month, 1)
|
||||
if month == 12:
|
||||
month_end = date(year, 12, 31)
|
||||
else:
|
||||
month_end = date(year, month + 1, 1) - timedelta(days=1)
|
||||
|
||||
start_date = max(start_date, month_start) if start_date else month_start
|
||||
end_date = min(end_date, month_end) if end_date else month_end
|
||||
|
||||
# 3. 根据 start/end 参数进一步收窄
|
||||
if args.start:
|
||||
custom_start = parse_date(args.start)
|
||||
start_date = max(start_date, custom_start) if start_date else custom_start
|
||||
|
||||
if args.end:
|
||||
custom_end = parse_date(args.end)
|
||||
end_date = min(end_date, custom_end) if end_date else custom_end
|
||||
|
||||
return start_date, end_date
|
||||
|
||||
|
||||
def is_in_date_range(date_str: str, start_date: date | None, end_date: date | None) -> bool:
|
||||
"""检查日期字符串是否在指定范围内"""
|
||||
if start_date is None and end_date is None:
|
||||
return True
|
||||
|
||||
try:
|
||||
row_date = datetime.strptime(date_str[:10], "%Y-%m-%d").date()
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
if start_date and row_date < start_date:
|
||||
return False
|
||||
if end_date and row_date > end_date:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def create_arg_parser(description: str) -> argparse.ArgumentParser:
|
||||
"""创建通用的命令行参数解析器"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description=description,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
日期筛选说明:
|
||||
--year 指定年份(如 2026)
|
||||
--month 指定月份(1-12)
|
||||
--start 起始日期(YYYY-MM-DD)
|
||||
--end 结束日期(YYYY-MM-DD)
|
||||
|
||||
多个条件同时指定时,取交集(最小日期范围)
|
||||
|
||||
输出格式:
|
||||
--format 输出格式:csv(默认)或 json
|
||||
"""
|
||||
)
|
||||
parser.add_argument("input_file", help="输入的账单CSV文件")
|
||||
parser.add_argument("output_file", nargs="?", default=None,
|
||||
help="输出文件(默认为 输入文件名_cleaned.csv/json)")
|
||||
parser.add_argument("--year", "-y", type=str, default=None,
|
||||
help="保留的年份(如 2026)")
|
||||
parser.add_argument("--month", "-m", type=int, choices=range(1, 13),
|
||||
metavar="1-12", help="保留的月份(1-12)")
|
||||
parser.add_argument("--start", "-s", type=str, help="起始日期(YYYY-MM-DD)")
|
||||
parser.add_argument("--end", "-e", type=str, help="结束日期(YYYY-MM-DD)")
|
||||
parser.add_argument("--format", "-f", choices=["csv", "json"], default="csv",
|
||||
help="输出格式:csv(默认)或 json")
|
||||
return parser
|
||||
|
||||
|
||||
def get_output_file(input_file: str, output_file: str | None, output_format: str = "csv") -> str:
|
||||
"""获取输出文件路径"""
|
||||
if output_file:
|
||||
return output_file
|
||||
import os
|
||||
base_name = os.path.splitext(input_file)[0]
|
||||
ext = "json" if output_format == "json" else "csv"
|
||||
return f"{base_name}_cleaned.{ext}"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 账单清理基类
|
||||
# =============================================================================
|
||||
|
||||
class BaseCleaner(ABC):
|
||||
"""账单清理基类"""
|
||||
|
||||
def __init__(self, input_file: str, output_file: str | None = None, output_format: str = "csv"):
|
||||
self.input_file = input_file
|
||||
self.output_format = output_format
|
||||
self.output_file = get_output_file(input_file, output_file, output_format)
|
||||
self.start_date: date | None = None
|
||||
self.end_date: date | None = None
|
||||
|
||||
# 统计信息
|
||||
self.stats = {
|
||||
"original_count": 0,
|
||||
"filtered_count": 0,
|
||||
"fully_refunded": 0,
|
||||
"partially_refunded": 0,
|
||||
"category_adjusted": 0,
|
||||
"final_count": 0,
|
||||
}
|
||||
|
||||
def set_date_range(self, start_date: date | None, end_date: date | None):
|
||||
"""设置日期筛选范围"""
|
||||
self.start_date = start_date
|
||||
self.end_date = end_date
|
||||
|
||||
def print_header(self):
|
||||
"""打印处理头信息"""
|
||||
print(f"输入文件: {self.input_file}")
|
||||
print(f"输出文件: {self.output_file}")
|
||||
print(f"输出格式: {self.output_format.upper()}")
|
||||
if self.start_date or self.end_date:
|
||||
print(f"日期范围: {self.start_date or '不限'} ~ {self.end_date or '不限'}")
|
||||
else:
|
||||
print("日期范围: 全部")
|
||||
print()
|
||||
|
||||
def write_output(self, header: list, rows: list):
|
||||
"""
|
||||
写入输出文件(支持 CSV 和 JSON 格式)
|
||||
|
||||
Args:
|
||||
header: 表头列表
|
||||
rows: 数据行列表
|
||||
"""
|
||||
if self.output_format == "json":
|
||||
self._write_json(header, rows)
|
||||
else:
|
||||
self._write_csv(header, rows)
|
||||
|
||||
def _write_csv(self, header: list, rows: list):
|
||||
"""写入 CSV 格式"""
|
||||
with open(self.output_file, "w", encoding="utf-8", newline="") as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow(header)
|
||||
writer.writerows(rows)
|
||||
|
||||
def _write_json(self, header: list, rows: list):
|
||||
"""写入 JSON 格式"""
|
||||
# 将每行转换为字典
|
||||
data = []
|
||||
for row in rows:
|
||||
record = {}
|
||||
for i, col in enumerate(header):
|
||||
if i < len(row):
|
||||
record[col] = row[i]
|
||||
else:
|
||||
record[col] = ""
|
||||
data.append(record)
|
||||
|
||||
with open(self.output_file, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||
|
||||
@abstractmethod
|
||||
def clean(self) -> None:
|
||||
"""执行清理,子类实现"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def reclassify(self, rows: list) -> list:
|
||||
"""
|
||||
重新分类(子类实现)
|
||||
|
||||
Args:
|
||||
rows: 待处理的数据行
|
||||
|
||||
Returns:
|
||||
处理后的数据行
|
||||
"""
|
||||
pass
|
||||
|
||||
288
analyzer/cleaners/wechat.py
Normal file
288
analyzer/cleaners/wechat.py
Normal file
@@ -0,0 +1,288 @@
|
||||
"""
|
||||
微信账单清理模块
|
||||
"""
|
||||
import csv
|
||||
import re
|
||||
from decimal import Decimal
|
||||
|
||||
from .base import (
|
||||
BaseCleaner, parse_amount, format_amount,
|
||||
is_in_date_range, create_arg_parser
|
||||
)
|
||||
from category import infer_category
|
||||
|
||||
|
||||
# 与支付宝对齐的表头(包含"复核等级"字段)
|
||||
ALIGNED_HEADER = [
|
||||
"交易时间", "交易分类", "交易对方", "对方账号", "商品说明",
|
||||
"收/支", "金额", "收/付款方式", "交易状态", "交易订单号", "商家订单号", "备注", "复核等级"
|
||||
]
|
||||
|
||||
|
||||
class WechatCleaner(BaseCleaner):
|
||||
"""微信账单清理器"""
|
||||
|
||||
def clean(self) -> None:
|
||||
"""执行清理"""
|
||||
self.print_header()
|
||||
|
||||
# 读取数据
|
||||
with open(self.input_file, "r", encoding="utf-8") as f:
|
||||
reader = csv.reader(f)
|
||||
header = next(reader)
|
||||
rows = list(reader)
|
||||
|
||||
self.stats["original_count"] = len(rows)
|
||||
print(f"原始数据行数: {len(rows)}")
|
||||
|
||||
# 第一步:按日期范围筛选
|
||||
rows_filtered = [
|
||||
row for row in rows
|
||||
if row and is_in_date_range(row[0], self.start_date, self.end_date)
|
||||
]
|
||||
self.stats["filtered_count"] = len(rows_filtered)
|
||||
|
||||
date_desc = f"{self.start_date} ~ {self.end_date}" if self.start_date or self.end_date else "全部"
|
||||
print(f"筛选后数据行数: {len(rows_filtered)} ({date_desc})")
|
||||
|
||||
# 第二步:分离退款、支出、收入
|
||||
refund_rows, expense_rows, income_rows = self._separate_rows(rows_filtered)
|
||||
|
||||
print(f"退款条目数: {len(refund_rows)}")
|
||||
print(f"支出条目数: {len(expense_rows)}")
|
||||
print(f"其他收入条目数: {len(income_rows)}")
|
||||
|
||||
# 第三步:处理退款(包括转账退款)
|
||||
final_expense_rows, income_rows = self._process_refunds(expense_rows, income_rows)
|
||||
|
||||
print(f"\n处理结果:")
|
||||
print(f" 全额退款删除: {self.stats['fully_refunded']} 条")
|
||||
print(f" 部分退款调整: {self.stats['partially_refunded']} 条")
|
||||
print(f" 保留支出条目: {len(final_expense_rows)} 条")
|
||||
print(f" 保留收入条目: {len(income_rows)} 条")
|
||||
|
||||
# 第四步:转换为对齐格式并重新分类
|
||||
aligned_expense = [self._convert_and_reclassify(r, remark) for r, remark in final_expense_rows]
|
||||
aligned_income = [self._convert_and_reclassify((r, None), None) for r in income_rows]
|
||||
|
||||
# 合并并排序
|
||||
final_rows = aligned_expense + aligned_income
|
||||
final_rows.sort(key=lambda x: x[0], reverse=True)
|
||||
|
||||
# 统计复核数量
|
||||
review_high_count = sum(1 for row in final_rows if row[-1] == "HIGH")
|
||||
|
||||
self.stats["final_count"] = len(final_rows)
|
||||
print(f" 最终保留行数: {len(final_rows)}")
|
||||
if review_high_count > 0:
|
||||
print(f" 高优先级复核: {review_high_count} 条(无法判断)")
|
||||
|
||||
# 写入文件
|
||||
self.write_output(ALIGNED_HEADER, final_rows)
|
||||
|
||||
print(f"\n清理后的数据已保存到: {self.output_file}")
|
||||
|
||||
# 统计支出
|
||||
self._print_expense_summary(aligned_expense)
|
||||
|
||||
def _separate_rows(self, rows: list) -> tuple[list, list, list]:
|
||||
"""分离退款、支出、收入记录"""
|
||||
refund_rows = []
|
||||
expense_rows = []
|
||||
income_rows = []
|
||||
|
||||
for row in rows:
|
||||
if len(row) < 6:
|
||||
continue
|
||||
|
||||
transaction_type = row[1]
|
||||
income_expense = row[4]
|
||||
|
||||
if "-退款" in transaction_type:
|
||||
refund_rows.append(row)
|
||||
elif income_expense == "支出":
|
||||
expense_rows.append(row)
|
||||
elif income_expense == "收入" and "-退款" not in transaction_type:
|
||||
income_rows.append(row)
|
||||
|
||||
return refund_rows, expense_rows, income_rows
|
||||
|
||||
def _process_refunds(self, expense_rows: list, income_rows: list) -> tuple[list, list]:
|
||||
"""
|
||||
处理退款(包括转账退款)
|
||||
|
||||
微信的退款有两种形式:
|
||||
1. 状态标注:支出记录的"当前状态"列标注"已退款"
|
||||
2. 转账退款:同一交易对方有收入记录(转账退回)
|
||||
"""
|
||||
# 3.1 识别转账退款
|
||||
transfer_refunds = {}
|
||||
transfer_refund_rows = []
|
||||
|
||||
for row in income_rows:
|
||||
merchant = row[2].strip()
|
||||
amount = parse_amount(row[5])
|
||||
|
||||
# 检查是否有对应的支出记录
|
||||
has_matching_expense = any(exp[2].strip() == merchant for exp in expense_rows)
|
||||
|
||||
if has_matching_expense:
|
||||
if merchant not in transfer_refunds:
|
||||
transfer_refunds[merchant] = Decimal("0")
|
||||
transfer_refunds[merchant] += amount
|
||||
transfer_refund_rows.append(row)
|
||||
|
||||
# 从收入中移除已识别的转账退款
|
||||
for row in transfer_refund_rows:
|
||||
income_rows.remove(row)
|
||||
|
||||
if transfer_refunds:
|
||||
print(f" 识别到转账退款: {len(transfer_refunds)} 笔")
|
||||
|
||||
# 3.2 处理支出记录
|
||||
final_expense_rows = []
|
||||
|
||||
for row in expense_rows:
|
||||
status = row[7]
|
||||
merchant = row[2].strip()
|
||||
original_amount = parse_amount(row[5])
|
||||
|
||||
# 计算总退款金额
|
||||
status_refund = Decimal("0")
|
||||
transfer_refund = transfer_refunds.get(merchant, Decimal("0"))
|
||||
|
||||
if "已全额退款" in status:
|
||||
self.stats["fully_refunded"] += 1
|
||||
print(f" 全额退款删除: {row[0]} | {row[2]} | {row[3][:25]}... | {row[5]}")
|
||||
continue
|
||||
elif "已退款" in status:
|
||||
status_refund = self._extract_refund_amount(status) or Decimal("0")
|
||||
|
||||
total_refund = status_refund + transfer_refund
|
||||
|
||||
if total_refund > 0:
|
||||
if total_refund >= original_amount:
|
||||
self.stats["fully_refunded"] += 1
|
||||
print(f" 全额退款删除: {row[0]} | {row[2]} | {row[3][:25]}... | {row[5]}")
|
||||
else:
|
||||
remaining = original_amount - total_refund
|
||||
new_row = row.copy()
|
||||
new_row[5] = f"¥{format_amount(remaining)}"
|
||||
remark = f"原金额{row[5]},退款¥{total_refund}"
|
||||
|
||||
final_expense_rows.append((new_row, remark))
|
||||
self.stats["partially_refunded"] += 1
|
||||
print(f" 部分退款: {row[0]} | {row[2]} | 原{row[5]} -> ¥{format_amount(remaining)}")
|
||||
|
||||
if merchant in transfer_refunds:
|
||||
del transfer_refunds[merchant]
|
||||
else:
|
||||
final_expense_rows.append((row, None))
|
||||
|
||||
return final_expense_rows, income_rows
|
||||
|
||||
def _extract_refund_amount(self, status: str) -> Decimal | None:
|
||||
"""从状态中提取已退款金额"""
|
||||
match = re.search(r'已退款[((]?¥?([\d.]+)[))]?', status)
|
||||
if match:
|
||||
return Decimal(match.group(1))
|
||||
if "已全额退款" in status:
|
||||
return None
|
||||
return Decimal("0")
|
||||
|
||||
def _convert_and_reclassify(self, row_tuple: tuple, remark_override: str | None) -> list:
|
||||
"""
|
||||
转换为对齐格式并重新分类
|
||||
|
||||
微信原始字段:
|
||||
0: 交易时间, 1: 交易类型, 2: 交易对方, 3: 商品,
|
||||
4: 收/支, 5: 金额(元), 6: 支付方式, 7: 当前状态,
|
||||
8: 交易单号, 9: 商户单号, 10: 备注
|
||||
|
||||
对齐后字段:
|
||||
交易时间, 交易分类, 交易对方, 对方账号, 商品说明,
|
||||
收/支, 金额, 收/付款方式, 交易状态, 交易订单号, 商家订单号, 备注, 需复核
|
||||
"""
|
||||
if isinstance(row_tuple, tuple):
|
||||
row, remark = row_tuple
|
||||
else:
|
||||
row, remark = row_tuple, None
|
||||
|
||||
remark = remark_override if remark_override else remark
|
||||
|
||||
transaction_time = row[0]
|
||||
merchant = row[2]
|
||||
product = row[3]
|
||||
income_expense = row[4]
|
||||
amount = parse_amount(row[5])
|
||||
payment_method = row[6]
|
||||
status = row[7]
|
||||
order_no = row[8]
|
||||
merchant_order_no = row[9] if len(row) > 9 else ""
|
||||
final_remark = remark if remark else (row[10] if len(row) > 10 else "/")
|
||||
|
||||
# 重新分类(微信原始的"交易类型"太笼统)
|
||||
category, is_certain = infer_category(merchant, product, income_expense)
|
||||
|
||||
# 复核等级: 空=无需复核, HIGH=无法判断
|
||||
review_mark = "" if is_certain else "HIGH"
|
||||
|
||||
return [
|
||||
transaction_time,
|
||||
category,
|
||||
merchant,
|
||||
"/", # 对方账号(微信无此字段)
|
||||
product,
|
||||
income_expense,
|
||||
format_amount(amount),
|
||||
payment_method,
|
||||
status,
|
||||
order_no,
|
||||
merchant_order_no,
|
||||
final_remark,
|
||||
review_mark
|
||||
]
|
||||
|
||||
def reclassify(self, rows: list) -> list:
|
||||
"""
|
||||
重新分类微信账单
|
||||
|
||||
微信账单在 _convert_and_reclassify 中已完成分类
|
||||
此方法为接口兼容保留
|
||||
"""
|
||||
return rows
|
||||
|
||||
def _print_expense_summary(self, expense_rows: list):
|
||||
"""打印支出统计"""
|
||||
total = Decimal("0")
|
||||
categories = {}
|
||||
|
||||
for row in expense_rows:
|
||||
if row[5] == "支出":
|
||||
amt = Decimal(row[6])
|
||||
total += amt
|
||||
cat = row[1]
|
||||
categories[cat] = categories.get(cat, Decimal("0")) + amt
|
||||
|
||||
print(f"清理后支出总额: ¥{total}")
|
||||
print("\n=== 按分类统计 ===")
|
||||
for cat, amt in sorted(categories.items(), key=lambda x: -x[1]):
|
||||
print(f" {cat}: ¥{amt}")
|
||||
|
||||
|
||||
def main():
|
||||
"""命令行入口"""
|
||||
parser = create_arg_parser("清理微信支付账单数据")
|
||||
args = parser.parse_args()
|
||||
|
||||
from .base import compute_date_range
|
||||
|
||||
cleaner = WechatCleaner(args.input_file, args.output_file)
|
||||
start_date, end_date = compute_date_range(args)
|
||||
cleaner.set_date_range(start_date, end_date)
|
||||
cleaner.clean()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user