191 lines
6.7 KiB
Python
191 lines
6.7 KiB
Python
"""
|
||
清理支付宝交易明细数据
|
||
1. 仅保留指定年份的数据(默认2026年)
|
||
2. 对于退款的条目,找到对应的支出:
|
||
- 如果退款金额=支出金额,两条都删除
|
||
- 如果退款金额<支出金额,保留差额并备注
|
||
|
||
用法: python clean_alipay_data.py <输入文件> [输出文件] [--year 年份]
|
||
示例: python clean_alipay_data.py 支付宝交易明细.csv output.csv --year 2026
|
||
"""
|
||
import csv
|
||
import argparse
|
||
from decimal import Decimal, ROUND_HALF_UP
|
||
|
||
|
||
def parse_args():
|
||
parser = argparse.ArgumentParser(description="清理支付宝交易明细数据")
|
||
parser.add_argument("input_file", help="输入的支付宝账单CSV文件")
|
||
parser.add_argument("output_file", nargs="?", default=None, help="输出文件(默认为 输入文件名_cleaned.csv)")
|
||
parser.add_argument("--year", type=str, default="2026", help="保留的年份(默认2026)")
|
||
return parser.parse_args()
|
||
|
||
|
||
def parse_amount(amount_str):
|
||
"""解析金额字符串为Decimal"""
|
||
try:
|
||
return Decimal(amount_str.strip())
|
||
except:
|
||
return Decimal("0")
|
||
|
||
|
||
def find_matching_expense(refund_row, expense_rows):
|
||
"""
|
||
找到退款对应的支出记录
|
||
返回 (索引, 支出记录) 或 (None, None)
|
||
"""
|
||
if len(refund_row) < 11:
|
||
return None, None
|
||
|
||
refund_order_no = refund_row[9].strip() # 交易订单号
|
||
refund_merchant_no = refund_row[10].strip() # 商家订单号
|
||
|
||
# 退款的交易订单号通常包含原订单号(用_分隔)
|
||
original_order = refund_order_no.split("_")[0] if "_" in refund_order_no else refund_order_no
|
||
|
||
for i, expense_row in enumerate(expense_rows):
|
||
if len(expense_row) >= 11:
|
||
expense_order_no = expense_row[9].strip()
|
||
expense_merchant_no = expense_row[10].strip()
|
||
|
||
# 匹配条件:订单号相同 或 商家订单号相同
|
||
if (original_order and expense_order_no == original_order) or \
|
||
(refund_merchant_no and expense_merchant_no == refund_merchant_no):
|
||
return i, expense_row
|
||
|
||
return None, None
|
||
|
||
|
||
def main():
|
||
args = parse_args()
|
||
|
||
input_file = args.input_file
|
||
output_file = args.output_file
|
||
year = args.year
|
||
|
||
# 如果未指定输出文件,自动生成
|
||
if output_file is None:
|
||
import os
|
||
base_name = os.path.splitext(input_file)[0]
|
||
output_file = f"{base_name}_cleaned.csv"
|
||
|
||
print(f"输入文件: {input_file}")
|
||
print(f"输出文件: {output_file}")
|
||
print(f"保留年份: {year}")
|
||
print()
|
||
|
||
# 读取所有数据
|
||
with open(input_file, "r", encoding="utf-8") as f:
|
||
reader = csv.reader(f)
|
||
header = next(reader)
|
||
rows = list(reader)
|
||
|
||
print(f"原始数据行数: {len(rows)}")
|
||
|
||
# 第一步:筛选指定年份的数据
|
||
rows_year = []
|
||
for row in rows:
|
||
if row and row[0].startswith(year):
|
||
rows_year.append(row)
|
||
|
||
print(f"{year}年数据行数: {len(rows_year)}")
|
||
|
||
# 第二步:分离退款和非退款条目
|
||
refund_rows = [] # 退款条目
|
||
expense_rows = [] # 非退款条目
|
||
|
||
for row in rows_year:
|
||
if len(row) > 1 and row[1] == "退款":
|
||
refund_rows.append(row)
|
||
else:
|
||
expense_rows.append(row)
|
||
|
||
print(f"退款条目数: {len(refund_rows)}")
|
||
print(f"非退款条目数: {len(expense_rows)}")
|
||
|
||
# 第三步:处理退款,按订单号聚合退款金额
|
||
# 一个支出可能有多笔退款
|
||
order_refunds = {} # 订单号 -> 退款总额
|
||
|
||
for refund_row in refund_rows:
|
||
if len(refund_row) >= 11:
|
||
refund_order_no = refund_row[9].strip()
|
||
refund_merchant_no = refund_row[10].strip()
|
||
refund_amount = parse_amount(refund_row[6])
|
||
|
||
original_order = refund_order_no.split("_")[0] if "_" in refund_order_no else refund_order_no
|
||
|
||
# 使用原订单号作为key
|
||
key = original_order if original_order else refund_merchant_no
|
||
if key:
|
||
if key not in order_refunds:
|
||
order_refunds[key] = Decimal("0")
|
||
order_refunds[key] += refund_amount
|
||
print(f" 退款记录: {refund_row[0]} | {refund_row[2]} | {refund_amount}元")
|
||
|
||
print(f"有退款的订单数: {len(order_refunds)}")
|
||
|
||
# 第四步:处理每笔支出
|
||
final_rows = []
|
||
fully_refunded = 0
|
||
partially_refunded = 0
|
||
|
||
for row in expense_rows:
|
||
if len(row) >= 12:
|
||
order_no = row[9].strip()
|
||
merchant_no = row[10].strip()
|
||
expense_amount = parse_amount(row[6])
|
||
|
||
# 查找对应的退款
|
||
refund_amount = Decimal("0")
|
||
matched_key = None
|
||
|
||
for key, amount in order_refunds.items():
|
||
if key and (order_no == key or merchant_no == key or order_no.startswith(key)):
|
||
refund_amount = amount
|
||
matched_key = key
|
||
break
|
||
|
||
if matched_key:
|
||
if refund_amount >= expense_amount:
|
||
# 全额退款,删除该条目
|
||
fully_refunded += 1
|
||
print(f" 全额退款删除: {row[0]} | {row[2]} | {row[4][:25]}... | 原{expense_amount}元")
|
||
else:
|
||
# 部分退款,保留差额并备注
|
||
remaining = expense_amount - refund_amount
|
||
remaining_str = str(remaining.quantize(Decimal("0.01"), rounding=ROUND_HALF_UP))
|
||
|
||
new_row = row.copy()
|
||
new_row[6] = remaining_str
|
||
# 在备注列添加说明
|
||
original_remark = new_row[11] if len(new_row) > 11 else ""
|
||
new_row[11] = f"原金额{expense_amount}元,退款{refund_amount}元{';' + original_remark if original_remark else ''}"
|
||
|
||
final_rows.append(new_row)
|
||
partially_refunded += 1
|
||
print(f" 部分退款: {row[0]} | {row[2]} | 原{expense_amount}元 -> {remaining_str}元")
|
||
else:
|
||
# 无退款,保留原记录
|
||
final_rows.append(row)
|
||
else:
|
||
final_rows.append(row)
|
||
|
||
print(f"\n处理结果:")
|
||
print(f" 全额退款删除: {fully_refunded} 条")
|
||
print(f" 部分退款调整: {partially_refunded} 条")
|
||
print(f" 最终保留行数: {len(final_rows)}")
|
||
|
||
# 写入清理后的数据
|
||
with open(output_file, "w", encoding="utf-8", newline="") as f:
|
||
writer = csv.writer(f)
|
||
writer.writerow(header)
|
||
writer.writerows(final_rows)
|
||
|
||
print(f"\n清理后的数据已保存到: {output_file}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|
||
|