""" 清理支付宝交易明细数据 1. 仅保留指定年份的数据(默认2026年) 2. 对于退款的条目,找到对应的支出: - 如果退款金额=支出金额,两条都删除 - 如果退款金额<支出金额,保留差额并备注 用法: python clean_alipay_data.py <输入文件> [输出文件] [--year 年份] 示例: python clean_alipay_data.py 支付宝交易明细.csv output.csv --year 2026 """ import csv import argparse from decimal import Decimal, ROUND_HALF_UP def parse_args(): parser = argparse.ArgumentParser(description="清理支付宝交易明细数据") parser.add_argument("input_file", help="输入的支付宝账单CSV文件") parser.add_argument("output_file", nargs="?", default=None, help="输出文件(默认为 输入文件名_cleaned.csv)") parser.add_argument("--year", type=str, default="2026", help="保留的年份(默认2026)") return parser.parse_args() def parse_amount(amount_str): """解析金额字符串为Decimal""" try: return Decimal(amount_str.strip()) except: return Decimal("0") def find_matching_expense(refund_row, expense_rows): """ 找到退款对应的支出记录 返回 (索引, 支出记录) 或 (None, None) """ if len(refund_row) < 11: return None, None refund_order_no = refund_row[9].strip() # 交易订单号 refund_merchant_no = refund_row[10].strip() # 商家订单号 # 退款的交易订单号通常包含原订单号(用_分隔) original_order = refund_order_no.split("_")[0] if "_" in refund_order_no else refund_order_no for i, expense_row in enumerate(expense_rows): if len(expense_row) >= 11: expense_order_no = expense_row[9].strip() expense_merchant_no = expense_row[10].strip() # 匹配条件:订单号相同 或 商家订单号相同 if (original_order and expense_order_no == original_order) or \ (refund_merchant_no and expense_merchant_no == refund_merchant_no): return i, expense_row return None, None def main(): args = parse_args() input_file = args.input_file output_file = args.output_file year = args.year # 如果未指定输出文件,自动生成 if output_file is None: import os base_name = os.path.splitext(input_file)[0] output_file = f"{base_name}_cleaned.csv" print(f"输入文件: {input_file}") print(f"输出文件: {output_file}") print(f"保留年份: {year}") print() # 读取所有数据 with open(input_file, "r", encoding="utf-8") as f: reader = csv.reader(f) header = next(reader) rows = list(reader) print(f"原始数据行数: {len(rows)}") # 第一步:筛选指定年份的数据 rows_year = [] for row in rows: if row and row[0].startswith(year): rows_year.append(row) print(f"{year}年数据行数: {len(rows_year)}") # 第二步:分离退款和非退款条目 refund_rows = [] # 退款条目 expense_rows = [] # 非退款条目 for row in rows_year: if len(row) > 1 and row[1] == "退款": refund_rows.append(row) else: expense_rows.append(row) print(f"退款条目数: {len(refund_rows)}") print(f"非退款条目数: {len(expense_rows)}") # 第三步:处理退款,按订单号聚合退款金额 # 一个支出可能有多笔退款 order_refunds = {} # 订单号 -> 退款总额 for refund_row in refund_rows: if len(refund_row) >= 11: refund_order_no = refund_row[9].strip() refund_merchant_no = refund_row[10].strip() refund_amount = parse_amount(refund_row[6]) original_order = refund_order_no.split("_")[0] if "_" in refund_order_no else refund_order_no # 使用原订单号作为key key = original_order if original_order else refund_merchant_no if key: if key not in order_refunds: order_refunds[key] = Decimal("0") order_refunds[key] += refund_amount print(f" 退款记录: {refund_row[0]} | {refund_row[2]} | {refund_amount}元") print(f"有退款的订单数: {len(order_refunds)}") # 第四步:处理每笔支出 final_rows = [] fully_refunded = 0 partially_refunded = 0 for row in expense_rows: if len(row) >= 12: order_no = row[9].strip() merchant_no = row[10].strip() expense_amount = parse_amount(row[6]) # 查找对应的退款 refund_amount = Decimal("0") matched_key = None for key, amount in order_refunds.items(): if key and (order_no == key or merchant_no == key or order_no.startswith(key)): refund_amount = amount matched_key = key break if matched_key: if refund_amount >= expense_amount: # 全额退款,删除该条目 fully_refunded += 1 print(f" 全额退款删除: {row[0]} | {row[2]} | {row[4][:25]}... | 原{expense_amount}元") else: # 部分退款,保留差额并备注 remaining = expense_amount - refund_amount remaining_str = str(remaining.quantize(Decimal("0.01"), rounding=ROUND_HALF_UP)) new_row = row.copy() new_row[6] = remaining_str # 在备注列添加说明 original_remark = new_row[11] if len(new_row) > 11 else "" new_row[11] = f"原金额{expense_amount}元,退款{refund_amount}元{';' + original_remark if original_remark else ''}" final_rows.append(new_row) partially_refunded += 1 print(f" 部分退款: {row[0]} | {row[2]} | 原{expense_amount}元 -> {remaining_str}元") else: # 无退款,保留原记录 final_rows.append(row) else: final_rows.append(row) print(f"\n处理结果:") print(f" 全额退款删除: {fully_refunded} 条") print(f" 部分退款调整: {partially_refunded} 条") print(f" 最终保留行数: {len(final_rows)}") # 写入清理后的数据 with open(output_file, "w", encoding="utf-8", newline="") as f: writer = csv.writer(f) writer.writerow(header) writer.writerows(final_rows) print(f"\n清理后的数据已保存到: {output_file}") if __name__ == "__main__": main()