chore: release v1.3.0 - 京东账单支持

2026-01-26 15:36:05 +08:00
parent b654265d96
commit f537b53ebd
5 changed files with 192 additions and 1 deletions
@@ -0,0 +1,40 @@
+"""分析京东账单数据"""
+import json
+import sys
+
+sys.stdout.reconfigure(encoding='utf-8')
+
+with open('../jd_bills.json', 'r', encoding='utf-8') as f:
+    d = json.load(f)
+
+bills = [b for b in d['data']['bills'] if b['bill_type'] == 'jd']
+print(f'Total JD bills: {len(bills)}')
+print()
+
+# Review level distribution
+review_levels = {}
+for b in bills:
+    lvl = b['review_level'] or 'NONE'
+    review_levels[lvl] = review_levels.get(lvl, 0) + 1
+print('Review level distribution:')
+for lvl, cnt in sorted(review_levels.items()):
+    print(f'  {lvl}: {cnt}')
+print()
+
+# Category distribution
+categories = {}
+for b in bills:
+    cat = b['category']
+    categories[cat] = categories.get(cat, 0) + 1
+print('Category distribution:')
+for cat, cnt in sorted(categories.items(), key=lambda x: -x[1]):
+    print(f'  {cat}: {cnt}')
+print()
+
+# Show bills that need review
+print('Bills needing review:')
+print(f"{'Level':<5} | {'Category':<12} | {'Merchant':<20} | Description")
+print('-' * 70)
+for b in bills:
+    if b['review_level']:
+        print(f"{b['review_level']:<5} | {b['category']:<12} | {b['merchant'][:20]:<20} | {b['description'][:30]}")
@@ -0,0 +1,116 @@
+"""
+测试京东账单清洗器
+"""
+import zipfile
+import tempfile
+import os
+import csv
+import sys
+
+# 确保输出使用 UTF-8
+sys.stdout.reconfigure(encoding='utf-8')
+
+def test_jd_cleaner():
+    zip_path = r'D:\Projects\BillAI\mock_data\京东交易流水(申请时间2026年01月26日13时29分47秒)(密码683263)_209.zip'
+    
+    with zipfile.ZipFile(zip_path, 'r') as zf:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            zf.extractall(tmpdir, pwd=b'683263')
+            
+            # Find CSV file
+            for f in os.listdir(tmpdir):
+                if f.endswith('.csv'):
+                    input_file = os.path.join(tmpdir, f)
+                    output_file = os.path.join(tmpdir, 'output.csv')
+                    
+                    print(f"Input file: {f}")
+                    print("-" * 60)
+                    
+                    # Run cleaner
+                    from cleaners.jd import JDCleaner
+                    cleaner = JDCleaner(input_file, output_file)
+                    cleaner.clean()
+                    
+                    # Read output and show review levels
+                    print("\n" + "=" * 60)
+                    print("OUTPUT REVIEW LEVELS")
+                    print("=" * 60)
+                    
+                    with open(output_file, 'r', encoding='utf-8') as of:
+                        reader = csv.reader(of)
+                        header = next(reader)
+                        review_idx = header.index('复核等级') if '复核等级' in header else -1
+                        cat_idx = header.index('交易分类') if '交易分类' in header else -1
+                        merchant_idx = header.index('交易对方') if '交易对方' in header else -1
+                        desc_idx = header.index('商品说明') if '商品说明' in header else -1
+                        
+                        stats = {'': 0, 'LOW': 0, 'HIGH': 0}
+                        rows_needing_review = []
+                        
+                        for row in reader:
+                            review = row[review_idx] if review_idx >= 0 else ''
+                            stats[review] = stats.get(review, 0) + 1
+                            if review:  # Collect rows that need review
+                                cat = row[cat_idx] if cat_idx >= 0 else ''
+                                merchant = row[merchant_idx][:20] if merchant_idx >= 0 else ''
+                                desc = row[desc_idx][:25] if desc_idx >= 0 else ''
+                                rows_needing_review.append((review, cat, merchant, desc))
+                        
+                        # Print rows needing review
+                        print(f"{'Level':<5} | {'Category':<12} | {'Merchant':<20} | Description")
+                        print("-" * 70)
+                        for review, cat, merchant, desc in rows_needing_review:
+                            print(f"{review:<5} | {cat:<12} | {merchant:<20} | {desc}")
+                        
+                        print("\n" + "=" * 60)
+                        print("STATISTICS")
+                        print("=" * 60)
+                        print(f"No review (confident): {stats['']}")
+                        print(f"LOW (keyword match):   {stats['LOW']}")
+                        print(f"HIGH (needs manual):   {stats['HIGH']}")
+                        print(f"Total:                 {sum(stats.values())}")
+
+
+def test_infer_jd_category():
+    """测试分类推断逻辑"""
+    from cleaners.jd import infer_jd_category
+    
+    print("\n" + "=" * 60)
+    print("INFER_JD_CATEGORY TESTS")
+    print("=" * 60)
+    
+    tests = [
+        # (商户, 商品, 原分类, 预期等级, 说明)
+        ('京东外卖', '火鸡面', '', 0, '商户映射'),
+        ('京东平台商户', 'xxx', '食品酒饮', 0, '原分类映射'),
+        ('京东平台商户', 'xxx', '数码电器', 0, '原分类映射'),
+        ('京东平台商户', 'xxx', '日用百货', 0, '原分类映射'),
+        ('京东平台商户', 'xxx', '图书文娱', 0, '原分类映射'),
+        ('京东平台商户', '猫粮', '其他', 1, '空映射+关键词成功'),
+        ('京东平台商户', '咖啡', '其他网购', 1, '空映射+关键词成功'),
+        ('京东平台商户', 'xxx', '其他', 2, '空映射+关键词失败'),
+        ('京东平台商户', 'xxx', '家居用品', 2, '未知分类'),
+        ('京东平台商户', 'xxx', '母婴', 2, '未知分类'),
+        ('京东平台商户', 'xxx', '', 2, '无原分类+关键词失败'),
+    ]
+    
+    level_map = {0: 'NONE', 1: 'LOW', 2: 'HIGH'}
+    
+    print(f"{'Merchant':<15} | {'Product':<8} | {'OrigCat':<10} | {'Result':<12} | {'Level':<5} | {'Expected':<5} | Note")
+    print("-" * 90)
+    
+    all_pass = True
+    for merchant, product, orig_cat, expected_level, note in tests:
+        cat, certain, level = infer_jd_category(merchant, product, orig_cat)
+        status = "✓" if level == expected_level else "✗"
+        if level != expected_level:
+            all_pass = False
+        print(f"{merchant:<15} | {product:<8} | {orig_cat or '(empty)':<10} | {cat:<12} | {level_map[level]:<5} | {level_map[expected_level]:<5} | {note} {status}")
+    
+    print("\n" + ("All tests passed!" if all_pass else "Some tests FAILED!"))
+
+
+if __name__ == '__main__':
+    test_infer_jd_category()
+    print("\n")
+    test_jd_cleaner()