diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a2684c..8652a4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,41 @@ 格式基于 [Keep a Changelog](https://keepachangelog.com/zh-CN/1.0.0/), 版本号遵循 [语义化版本](https://semver.org/lang/zh-CN/)。 +## [1.3.0] - 2026-01-26 + +### 新增 +- **京东账单支持** - 支持京东白条账单上传和清洗 + - 自动识别京东账单类型(交易流水 ZIP) + - 解析京东白条账单 CSV 格式(含还款日期信息) + - 京东专属分类映射配置(`config/category_jd.yaml`) + - 支持京东外卖、京东平台商户等商户识别 + - 上传页面和账单列表页面添加"京东"选项 + - 账单来源 Badge 添加紫色京东标识 + +### 优化 +- **京东订单智能去重** - 上传京东账单时自动软删除其他来源中的京东订单 + - 识别描述中包含"京东-订单编号"的支付宝/微信账单 + - 软删除冲突记录,避免重复计入支出 + - 上传响应返回被删除的记录数 +- **分类推断复核等级优化** - 京东账单引入 LOW 复核等级 + - 商户映射成功(如"京东外卖"):无需复核 + - 原分类映射成功(如"食品酒饮"→餐饮美食):无需复核 + - 通用关键词匹配成功:**LOW 复核**(需确认推断准确性) + - 未知分类或匹配失败:HIGH 复核 +- **京东平台商户关键词扩展** - 在通用分类配置中添加京东平台常见关键词 + - 宠物用品:小佩、米家宠物、猫砂、猫粮等 + - 数码电器:小米、延长保修、家电等 + +### 技术改进 +- 新增 `analyzer/cleaners/jd.py` 京东账单清理器 +- 新增 `analyzer/config/category_jd.yaml` 京东专属配置 +- 后端新增 `SoftDeleteJDRelatedBills()` 接口和实现 +- 前端 API 类型添加 `'jd'` 支持 +- 新增单元测试 `analyzer/test_jd_cleaner.py`(11 个测试用例) + +### 文档 +- 更新 `TODO.md` 添加 Gitea Webhook 自动部署计划 + ## [1.2.0] - 2026-01-25 ### 新增 diff --git a/analyzer/analyze_jd_bills.py b/analyzer/analyze_jd_bills.py new file mode 100644 index 0000000..e4d7d54 --- /dev/null +++ b/analyzer/analyze_jd_bills.py @@ -0,0 +1,40 @@ +"""分析京东账单数据""" +import json +import sys + +sys.stdout.reconfigure(encoding='utf-8') + +with open('../jd_bills.json', 'r', encoding='utf-8') as f: + d = json.load(f) + +bills = [b for b in d['data']['bills'] if b['bill_type'] == 'jd'] +print(f'Total JD bills: {len(bills)}') +print() + +# Review level distribution +review_levels = {} +for b in bills: + lvl = b['review_level'] or 'NONE' + review_levels[lvl] = review_levels.get(lvl, 0) + 1 +print('Review level distribution:') +for lvl, cnt in sorted(review_levels.items()): + print(f' {lvl}: {cnt}') +print() + +# Category distribution +categories = {} +for b in bills: + cat = b['category'] + categories[cat] = categories.get(cat, 0) + 1 +print('Category distribution:') +for cat, cnt in sorted(categories.items(), key=lambda x: -x[1]): + print(f' {cat}: {cnt}') +print() + +# Show bills that need review +print('Bills needing review:') +print(f"{'Level':<5} | {'Category':<12} | {'Merchant':<20} | Description") +print('-' * 70) +for b in bills: + if b['review_level']: + print(f"{b['review_level']:<5} | {b['category']:<12} | {b['merchant'][:20]:<20} | {b['description'][:30]}") diff --git a/analyzer/test_jd_cleaner.py b/analyzer/test_jd_cleaner.py new file mode 100644 index 0000000..e0de792 --- /dev/null +++ b/analyzer/test_jd_cleaner.py @@ -0,0 +1,116 @@ +""" +测试京东账单清洗器 +""" +import zipfile +import tempfile +import os +import csv +import sys + +# 确保输出使用 UTF-8 +sys.stdout.reconfigure(encoding='utf-8') + +def test_jd_cleaner(): + zip_path = r'D:\Projects\BillAI\mock_data\京东交易流水(申请时间2026年01月26日13时29分47秒)(密码683263)_209.zip' + + with zipfile.ZipFile(zip_path, 'r') as zf: + with tempfile.TemporaryDirectory() as tmpdir: + zf.extractall(tmpdir, pwd=b'683263') + + # Find CSV file + for f in os.listdir(tmpdir): + if f.endswith('.csv'): + input_file = os.path.join(tmpdir, f) + output_file = os.path.join(tmpdir, 'output.csv') + + print(f"Input file: {f}") + print("-" * 60) + + # Run cleaner + from cleaners.jd import JDCleaner + cleaner = JDCleaner(input_file, output_file) + cleaner.clean() + + # Read output and show review levels + print("\n" + "=" * 60) + print("OUTPUT REVIEW LEVELS") + print("=" * 60) + + with open(output_file, 'r', encoding='utf-8') as of: + reader = csv.reader(of) + header = next(reader) + review_idx = header.index('复核等级') if '复核等级' in header else -1 + cat_idx = header.index('交易分类') if '交易分类' in header else -1 + merchant_idx = header.index('交易对方') if '交易对方' in header else -1 + desc_idx = header.index('商品说明') if '商品说明' in header else -1 + + stats = {'': 0, 'LOW': 0, 'HIGH': 0} + rows_needing_review = [] + + for row in reader: + review = row[review_idx] if review_idx >= 0 else '' + stats[review] = stats.get(review, 0) + 1 + if review: # Collect rows that need review + cat = row[cat_idx] if cat_idx >= 0 else '' + merchant = row[merchant_idx][:20] if merchant_idx >= 0 else '' + desc = row[desc_idx][:25] if desc_idx >= 0 else '' + rows_needing_review.append((review, cat, merchant, desc)) + + # Print rows needing review + print(f"{'Level':<5} | {'Category':<12} | {'Merchant':<20} | Description") + print("-" * 70) + for review, cat, merchant, desc in rows_needing_review: + print(f"{review:<5} | {cat:<12} | {merchant:<20} | {desc}") + + print("\n" + "=" * 60) + print("STATISTICS") + print("=" * 60) + print(f"No review (confident): {stats['']}") + print(f"LOW (keyword match): {stats['LOW']}") + print(f"HIGH (needs manual): {stats['HIGH']}") + print(f"Total: {sum(stats.values())}") + + +def test_infer_jd_category(): + """测试分类推断逻辑""" + from cleaners.jd import infer_jd_category + + print("\n" + "=" * 60) + print("INFER_JD_CATEGORY TESTS") + print("=" * 60) + + tests = [ + # (商户, 商品, 原分类, 预期等级, 说明) + ('京东外卖', '火鸡面', '', 0, '商户映射'), + ('京东平台商户', 'xxx', '食品酒饮', 0, '原分类映射'), + ('京东平台商户', 'xxx', '数码电器', 0, '原分类映射'), + ('京东平台商户', 'xxx', '日用百货', 0, '原分类映射'), + ('京东平台商户', 'xxx', '图书文娱', 0, '原分类映射'), + ('京东平台商户', '猫粮', '其他', 1, '空映射+关键词成功'), + ('京东平台商户', '咖啡', '其他网购', 1, '空映射+关键词成功'), + ('京东平台商户', 'xxx', '其他', 2, '空映射+关键词失败'), + ('京东平台商户', 'xxx', '家居用品', 2, '未知分类'), + ('京东平台商户', 'xxx', '母婴', 2, '未知分类'), + ('京东平台商户', 'xxx', '', 2, '无原分类+关键词失败'), + ] + + level_map = {0: 'NONE', 1: 'LOW', 2: 'HIGH'} + + print(f"{'Merchant':<15} | {'Product':<8} | {'OrigCat':<10} | {'Result':<12} | {'Level':<5} | {'Expected':<5} | Note") + print("-" * 90) + + all_pass = True + for merchant, product, orig_cat, expected_level, note in tests: + cat, certain, level = infer_jd_category(merchant, product, orig_cat) + status = "✓" if level == expected_level else "✗" + if level != expected_level: + all_pass = False + print(f"{merchant:<15} | {product:<8} | {orig_cat or '(empty)':<10} | {cat:<12} | {level_map[level]:<5} | {level_map[expected_level]:<5} | {note} {status}") + + print("\n" + ("All tests passed!" if all_pass else "Some tests FAILED!")) + + +if __name__ == '__main__': + test_infer_jd_category() + print("\n") + test_jd_cleaner() diff --git a/mock_data/京东交易流水(申请时间2026年01月26日13时29分47秒)(密码683263)_209.zip b/mock_data/京东交易流水(申请时间2026年01月26日13时29分47秒)(密码683263)_209.zip new file mode 100644 index 0000000..7bae678 Binary files /dev/null and b/mock_data/京东交易流水(申请时间2026年01月26日13时29分47秒)(密码683263)_209.zip differ diff --git a/web/package.json b/web/package.json index 73b2b68..7564ac0 100644 --- a/web/package.json +++ b/web/package.json @@ -1,7 +1,7 @@ { "name": "web", "private": true, - "version": "1.2.1", + "version": "1.3.0", "type": "module", "scripts": { "dev": "vite dev",