""" 测试京东账单清洗器 """ import zipfile import tempfile import os import csv import sys # 确保输出使用 UTF-8 sys.stdout.reconfigure(encoding='utf-8') def test_jd_cleaner(): zip_path = r'D:\Projects\BillAI\mock_data\京东交易流水(申请时间2026年01月26日13时29分47秒)(密码683263)_209.zip' with zipfile.ZipFile(zip_path, 'r') as zf: with tempfile.TemporaryDirectory() as tmpdir: zf.extractall(tmpdir, pwd=b'683263') # Find CSV file for f in os.listdir(tmpdir): if f.endswith('.csv'): input_file = os.path.join(tmpdir, f) output_file = os.path.join(tmpdir, 'output.csv') print(f"Input file: {f}") print("-" * 60) # Run cleaner from cleaners.jd import JDCleaner cleaner = JDCleaner(input_file, output_file) cleaner.clean() # Read output and show review levels print("\n" + "=" * 60) print("OUTPUT REVIEW LEVELS") print("=" * 60) with open(output_file, 'r', encoding='utf-8') as of: reader = csv.reader(of) header = next(reader) review_idx = header.index('复核等级') if '复核等级' in header else -1 cat_idx = header.index('交易分类') if '交易分类' in header else -1 merchant_idx = header.index('交易对方') if '交易对方' in header else -1 desc_idx = header.index('商品说明') if '商品说明' in header else -1 stats = {'': 0, 'LOW': 0, 'HIGH': 0} rows_needing_review = [] for row in reader: review = row[review_idx] if review_idx >= 0 else '' stats[review] = stats.get(review, 0) + 1 if review: # Collect rows that need review cat = row[cat_idx] if cat_idx >= 0 else '' merchant = row[merchant_idx][:20] if merchant_idx >= 0 else '' desc = row[desc_idx][:25] if desc_idx >= 0 else '' rows_needing_review.append((review, cat, merchant, desc)) # Print rows needing review print(f"{'Level':<5} | {'Category':<12} | {'Merchant':<20} | Description") print("-" * 70) for review, cat, merchant, desc in rows_needing_review: print(f"{review:<5} | {cat:<12} | {merchant:<20} | {desc}") print("\n" + "=" * 60) print("STATISTICS") print("=" * 60) print(f"No review (confident): {stats['']}") print(f"LOW (keyword match): {stats['LOW']}") print(f"HIGH (needs manual): {stats['HIGH']}") print(f"Total: {sum(stats.values())}") def test_infer_jd_category(): """测试分类推断逻辑""" from cleaners.jd import infer_jd_category print("\n" + "=" * 60) print("INFER_JD_CATEGORY TESTS") print("=" * 60) tests = [ # (商户, 商品, 原分类, 预期等级, 说明) ('京东外卖', '火鸡面', '', 0, '商户映射'), ('京东平台商户', 'xxx', '食品酒饮', 0, '原分类映射'), ('京东平台商户', 'xxx', '数码电器', 0, '原分类映射'), ('京东平台商户', 'xxx', '日用百货', 0, '原分类映射'), ('京东平台商户', 'xxx', '图书文娱', 0, '原分类映射'), ('京东平台商户', '猫粮', '其他', 1, '空映射+关键词成功'), ('京东平台商户', '咖啡', '其他网购', 1, '空映射+关键词成功'), ('京东平台商户', 'xxx', '其他', 2, '空映射+关键词失败'), ('京东平台商户', 'xxx', '家居用品', 2, '未知分类'), ('京东平台商户', 'xxx', '母婴', 2, '未知分类'), ('京东平台商户', 'xxx', '', 2, '无原分类+关键词失败'), ] level_map = {0: 'NONE', 1: 'LOW', 2: 'HIGH'} print(f"{'Merchant':<15} | {'Product':<8} | {'OrigCat':<10} | {'Result':<12} | {'Level':<5} | {'Expected':<5} | Note") print("-" * 90) all_pass = True for merchant, product, orig_cat, expected_level, note in tests: cat, certain, level = infer_jd_category(merchant, product, orig_cat) status = "✓" if level == expected_level else "✗" if level != expected_level: all_pass = False print(f"{merchant:<15} | {product:<8} | {orig_cat or '(empty)':<10} | {cat:<12} | {level_map[level]:<5} | {level_map[expected_level]:<5} | {note} {status}") print("\n" + ("All tests passed!" if all_pass else "Some tests FAILED!")) if __name__ == '__main__': test_infer_jd_category() print("\n") test_jd_cleaner()