Files
billai/analyzer/test_jd_cleaner.py
cheliangzhao a2de8c5078 feat: implement WeChat cross-batch refund reconciliation and fix misc issues
WeChat cross-batch refund reconciliation:
- Add OriginalAmount field to CleanedBill for accurate cumulative refund math
- DeduplicateRawFile detects WeChat status-update rows (已退款/已全额退款) and
  emits WechatRefundUpdates for Go-side reconciliation (Scenario 1)
- WechatPy cleaner surfaces -退款 income rows with no same-batch expense match
  as unresolved_refunds for Go ReconcileRefund (Scenario 2)
- Add ReconcileWechatRefund to repository interface and MongoDB implementation
- upload.go step 15 iterates WechatRefundUpdates and reconciles against bills_cleaned

Bug fixes:
- ReviewStats: add nil repo check to prevent panic when DB is not connected
- JWT: remove hardcoded fallback secret; return 500/401 if JWTSecret not configured
- Remove unused parsePageParam dead code and its strconv import
- BillDetailDrawer: show 不计收支 amount in muted gray instead of red
- test_jd_cleaner.py: replace hardcoded D:\Projects\BillAI path with dynamic __file__ resolution

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-16 21:38:25 +08:00

118 lines
5.4 KiB
Python

"""
测试京东账单清洗器
"""
import zipfile
import tempfile
import os
import csv
import sys
# 确保输出使用 UTF-8
sys.stdout.reconfigure(encoding='utf-8')
def test_jd_cleaner():
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
zip_path = os.path.join(base_dir, 'mock_data', '京东交易流水(申请时间2026年01月26日13时29分47秒)(密码683263)_209.zip')
with zipfile.ZipFile(zip_path, 'r') as zf:
with tempfile.TemporaryDirectory() as tmpdir:
zf.extractall(tmpdir, pwd=b'683263')
# Find CSV file
for f in os.listdir(tmpdir):
if f.endswith('.csv'):
input_file = os.path.join(tmpdir, f)
output_file = os.path.join(tmpdir, 'output.csv')
print(f"Input file: {f}")
print("-" * 60)
# Run cleaner
from cleaners.jd import JDCleaner
cleaner = JDCleaner(input_file, output_file)
cleaner.clean()
# Read output and show review levels
print("\n" + "=" * 60)
print("OUTPUT REVIEW LEVELS")
print("=" * 60)
with open(output_file, 'r', encoding='utf-8') as of:
reader = csv.reader(of)
header = next(reader)
review_idx = header.index('复核等级') if '复核等级' in header else -1
cat_idx = header.index('交易分类') if '交易分类' in header else -1
merchant_idx = header.index('交易对方') if '交易对方' in header else -1
desc_idx = header.index('商品说明') if '商品说明' in header else -1
stats = {'': 0, 'LOW': 0, 'HIGH': 0}
rows_needing_review = []
for row in reader:
review = row[review_idx] if review_idx >= 0 else ''
stats[review] = stats.get(review, 0) + 1
if review: # Collect rows that need review
cat = row[cat_idx] if cat_idx >= 0 else ''
merchant = row[merchant_idx][:20] if merchant_idx >= 0 else ''
desc = row[desc_idx][:25] if desc_idx >= 0 else ''
rows_needing_review.append((review, cat, merchant, desc))
# Print rows needing review
print(f"{'Level':<5} | {'Category':<12} | {'Merchant':<20} | Description")
print("-" * 70)
for review, cat, merchant, desc in rows_needing_review:
print(f"{review:<5} | {cat:<12} | {merchant:<20} | {desc}")
print("\n" + "=" * 60)
print("STATISTICS")
print("=" * 60)
print(f"No review (confident): {stats['']}")
print(f"LOW (keyword match): {stats['LOW']}")
print(f"HIGH (needs manual): {stats['HIGH']}")
print(f"Total: {sum(stats.values())}")
def test_infer_jd_category():
"""测试分类推断逻辑"""
from cleaners.jd import infer_jd_category
print("\n" + "=" * 60)
print("INFER_JD_CATEGORY TESTS")
print("=" * 60)
tests = [
# (商户, 商品, 原分类, 预期等级, 说明)
('京东外卖', '火鸡面', '', 0, '商户映射'),
('京东平台商户', 'xxx', '食品酒饮', 0, '原分类映射'),
('京东平台商户', 'xxx', '数码电器', 0, '原分类映射'),
('京东平台商户', 'xxx', '日用百货', 0, '原分类映射'),
('京东平台商户', 'xxx', '图书文娱', 0, '原分类映射'),
('京东平台商户', '猫粮', '其他', 1, '空映射+关键词成功'),
('京东平台商户', '咖啡', '其他网购', 1, '空映射+关键词成功'),
('京东平台商户', 'xxx', '其他', 2, '空映射+关键词失败'),
('京东平台商户', 'xxx', '家居用品', 2, '未知分类'),
('京东平台商户', 'xxx', '母婴', 2, '未知分类'),
('京东平台商户', 'xxx', '', 2, '无原分类+关键词失败'),
]
level_map = {0: 'NONE', 1: 'LOW', 2: 'HIGH'}
print(f"{'Merchant':<15} | {'Product':<8} | {'OrigCat':<10} | {'Result':<12} | {'Level':<5} | {'Expected':<5} | Note")
print("-" * 90)
all_pass = True
for merchant, product, orig_cat, expected_level, note in tests:
cat, certain, level = infer_jd_category(merchant, product, orig_cat)
status = "" if level == expected_level else ""
if level != expected_level:
all_pass = False
print(f"{merchant:<15} | {product:<8} | {orig_cat or '(empty)':<10} | {cat:<12} | {level_map[level]:<5} | {level_map[expected_level]:<5} | {note} {status}")
print("\n" + ("All tests passed!" if all_pass else "Some tests FAILED!"))
if __name__ == '__main__':
test_infer_jd_category()
print("\n")
test_jd_cleaner()