From f537b53ebdeced8eb2a72a25b3e6708d36bb7bfc Mon Sep 17 00:00:00 2001 From: CHE LIANG ZHAO Date: Mon, 26 Jan 2026 15:36:05 +0800 Subject: [PATCH] =?UTF-8?q?chore:=20release=20v1.3.0=20-=20=E4=BA=AC?= =?UTF-8?q?=E4=B8=9C=E8=B4=A6=E5=8D=95=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 35 ++++++ analyzer/analyze_jd_bills.py | 40 ++++++ analyzer/test_jd_cleaner.py | 116 ++++++++++++++++++ ...间2026年01月26日13时29分47秒)(密码683263)_209.zip | Bin 0 -> 3807 bytes web/package.json | 2 +- 5 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 analyzer/analyze_jd_bills.py create mode 100644 analyzer/test_jd_cleaner.py create mode 100644 mock_data/京东交易流水(申请时间2026年01月26日13时29分47秒)(密码683263)_209.zip diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a2684c..8652a4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,41 @@ 格式基于 [Keep a Changelog](https://keepachangelog.com/zh-CN/1.0.0/), 版本号遵循 [语义化版本](https://semver.org/lang/zh-CN/)。 +## [1.3.0] - 2026-01-26 + +### 新增 +- **京东账单支持** - 支持京东白条账单上传和清洗 + - 自动识别京东账单类型(交易流水 ZIP) + - 解析京东白条账单 CSV 格式(含还款日期信息) + - 京东专属分类映射配置(`config/category_jd.yaml`) + - 支持京东外卖、京东平台商户等商户识别 + - 上传页面和账单列表页面添加"京东"选项 + - 账单来源 Badge 添加紫色京东标识 + +### 优化 +- **京东订单智能去重** - 上传京东账单时自动软删除其他来源中的京东订单 + - 识别描述中包含"京东-订单编号"的支付宝/微信账单 + - 软删除冲突记录,避免重复计入支出 + - 上传响应返回被删除的记录数 +- **分类推断复核等级优化** - 京东账单引入 LOW 复核等级 + - 商户映射成功(如"京东外卖"):无需复核 + - 原分类映射成功(如"食品酒饮"→餐饮美食):无需复核 + - 通用关键词匹配成功:**LOW 复核**(需确认推断准确性) + - 未知分类或匹配失败:HIGH 复核 +- **京东平台商户关键词扩展** - 在通用分类配置中添加京东平台常见关键词 + - 宠物用品:小佩、米家宠物、猫砂、猫粮等 + - 数码电器:小米、延长保修、家电等 + +### 技术改进 +- 新增 `analyzer/cleaners/jd.py` 京东账单清理器 +- 新增 `analyzer/config/category_jd.yaml` 京东专属配置 +- 后端新增 `SoftDeleteJDRelatedBills()` 接口和实现 +- 前端 API 类型添加 `'jd'` 支持 +- 新增单元测试 `analyzer/test_jd_cleaner.py`(11 个测试用例) + +### 文档 +- 更新 `TODO.md` 添加 Gitea Webhook 自动部署计划 + ## [1.2.0] - 2026-01-25 ### 新增 diff --git a/analyzer/analyze_jd_bills.py b/analyzer/analyze_jd_bills.py new file mode 100644 index 0000000..e4d7d54 --- /dev/null +++ b/analyzer/analyze_jd_bills.py @@ -0,0 +1,40 @@ +"""分析京东账单数据""" +import json +import sys + +sys.stdout.reconfigure(encoding='utf-8') + +with open('../jd_bills.json', 'r', encoding='utf-8') as f: + d = json.load(f) + +bills = [b for b in d['data']['bills'] if b['bill_type'] == 'jd'] +print(f'Total JD bills: {len(bills)}') +print() + +# Review level distribution +review_levels = {} +for b in bills: + lvl = b['review_level'] or 'NONE' + review_levels[lvl] = review_levels.get(lvl, 0) + 1 +print('Review level distribution:') +for lvl, cnt in sorted(review_levels.items()): + print(f' {lvl}: {cnt}') +print() + +# Category distribution +categories = {} +for b in bills: + cat = b['category'] + categories[cat] = categories.get(cat, 0) + 1 +print('Category distribution:') +for cat, cnt in sorted(categories.items(), key=lambda x: -x[1]): + print(f' {cat}: {cnt}') +print() + +# Show bills that need review +print('Bills needing review:') +print(f"{'Level':<5} | {'Category':<12} | {'Merchant':<20} | Description") +print('-' * 70) +for b in bills: + if b['review_level']: + print(f"{b['review_level']:<5} | {b['category']:<12} | {b['merchant'][:20]:<20} | {b['description'][:30]}") diff --git a/analyzer/test_jd_cleaner.py b/analyzer/test_jd_cleaner.py new file mode 100644 index 0000000..e0de792 --- /dev/null +++ b/analyzer/test_jd_cleaner.py @@ -0,0 +1,116 @@ +""" +测试京东账单清洗器 +""" +import zipfile +import tempfile +import os +import csv +import sys + +# 确保输出使用 UTF-8 +sys.stdout.reconfigure(encoding='utf-8') + +def test_jd_cleaner(): + zip_path = r'D:\Projects\BillAI\mock_data\京东交易流水(申请时间2026年01月26日13时29分47秒)(密码683263)_209.zip' + + with zipfile.ZipFile(zip_path, 'r') as zf: + with tempfile.TemporaryDirectory() as tmpdir: + zf.extractall(tmpdir, pwd=b'683263') + + # Find CSV file + for f in os.listdir(tmpdir): + if f.endswith('.csv'): + input_file = os.path.join(tmpdir, f) + output_file = os.path.join(tmpdir, 'output.csv') + + print(f"Input file: {f}") + print("-" * 60) + + # Run cleaner + from cleaners.jd import JDCleaner + cleaner = JDCleaner(input_file, output_file) + cleaner.clean() + + # Read output and show review levels + print("\n" + "=" * 60) + print("OUTPUT REVIEW LEVELS") + print("=" * 60) + + with open(output_file, 'r', encoding='utf-8') as of: + reader = csv.reader(of) + header = next(reader) + review_idx = header.index('复核等级') if '复核等级' in header else -1 + cat_idx = header.index('交易分类') if '交易分类' in header else -1 + merchant_idx = header.index('交易对方') if '交易对方' in header else -1 + desc_idx = header.index('商品说明') if '商品说明' in header else -1 + + stats = {'': 0, 'LOW': 0, 'HIGH': 0} + rows_needing_review = [] + + for row in reader: + review = row[review_idx] if review_idx >= 0 else '' + stats[review] = stats.get(review, 0) + 1 + if review: # Collect rows that need review + cat = row[cat_idx] if cat_idx >= 0 else '' + merchant = row[merchant_idx][:20] if merchant_idx >= 0 else '' + desc = row[desc_idx][:25] if desc_idx >= 0 else '' + rows_needing_review.append((review, cat, merchant, desc)) + + # Print rows needing review + print(f"{'Level':<5} | {'Category':<12} | {'Merchant':<20} | Description") + print("-" * 70) + for review, cat, merchant, desc in rows_needing_review: + print(f"{review:<5} | {cat:<12} | {merchant:<20} | {desc}") + + print("\n" + "=" * 60) + print("STATISTICS") + print("=" * 60) + print(f"No review (confident): {stats['']}") + print(f"LOW (keyword match): {stats['LOW']}") + print(f"HIGH (needs manual): {stats['HIGH']}") + print(f"Total: {sum(stats.values())}") + + +def test_infer_jd_category(): + """测试分类推断逻辑""" + from cleaners.jd import infer_jd_category + + print("\n" + "=" * 60) + print("INFER_JD_CATEGORY TESTS") + print("=" * 60) + + tests = [ + # (商户, 商品, 原分类, 预期等级, 说明) + ('京东外卖', '火鸡面', '', 0, '商户映射'), + ('京东平台商户', 'xxx', '食品酒饮', 0, '原分类映射'), + ('京东平台商户', 'xxx', '数码电器', 0, '原分类映射'), + ('京东平台商户', 'xxx', '日用百货', 0, '原分类映射'), + ('京东平台商户', 'xxx', '图书文娱', 0, '原分类映射'), + ('京东平台商户', '猫粮', '其他', 1, '空映射+关键词成功'), + ('京东平台商户', '咖啡', '其他网购', 1, '空映射+关键词成功'), + ('京东平台商户', 'xxx', '其他', 2, '空映射+关键词失败'), + ('京东平台商户', 'xxx', '家居用品', 2, '未知分类'), + ('京东平台商户', 'xxx', '母婴', 2, '未知分类'), + ('京东平台商户', 'xxx', '', 2, '无原分类+关键词失败'), + ] + + level_map = {0: 'NONE', 1: 'LOW', 2: 'HIGH'} + + print(f"{'Merchant':<15} | {'Product':<8} | {'OrigCat':<10} | {'Result':<12} | {'Level':<5} | {'Expected':<5} | Note") + print("-" * 90) + + all_pass = True + for merchant, product, orig_cat, expected_level, note in tests: + cat, certain, level = infer_jd_category(merchant, product, orig_cat) + status = "✓" if level == expected_level else "✗" + if level != expected_level: + all_pass = False + print(f"{merchant:<15} | {product:<8} | {orig_cat or '(empty)':<10} | {cat:<12} | {level_map[level]:<5} | {level_map[expected_level]:<5} | {note} {status}") + + print("\n" + ("All tests passed!" if all_pass else "Some tests FAILED!")) + + +if __name__ == '__main__': + test_infer_jd_category() + print("\n") + test_jd_cleaner() diff --git a/mock_data/京东交易流水(申请时间2026年01月26日13时29分47秒)(密码683263)_209.zip b/mock_data/京东交易流水(申请时间2026年01月26日13时29分47秒)(密码683263)_209.zip new file mode 100644 index 0000000000000000000000000000000000000000..7bae678c59b6b0d698276cc85301877e13ba187d GIT binary patch literal 3807 zcmb`KXE+-SpoNp7R&1)a*51V4RBckbR_#4%r)Jcs#)^s&YHv!YQd??|hAKX4s}(D> zM%8w|@BY94?m0isbDr}&=l^S@Pe4cy0FsaZ+PoEP9Z2I52NVE66Au8O0{{RvJL)#u z@;5umaXA^d&oQ`<&HUS$O*;*(I8@6nsu?T}mf31=7MH-~!@)8*RE31(zXMo)3!We) zyIqA8u$PjM7IhASo;h9V=MFVHJVy>HOE-R4*b(ja2_exbQWh#94Ym&xLW_0d2@a6v zreM7mL5#(BCW-YWzBW%nkr{w_^DHt(j9^HaYRd$ zVkkc@{i65vhs?hvL0LLF#bl@;??uA#_jt!BDb=jkb&cqYRGJH&zn}1%Jo4iKb0xNV zu|n3fYT1?dm#8^5`VR`eB&Trq$ES&@-04k~8fu`AExiy=BxJcK2|o1*rq;vN=$ZE8 zZ9H)lDOUA??xa+C=aQgG`|b)c<$)|cEs%t0IlenL8T&G?Yi9%dcD>1}(B7(?iW#5P z#~}l4qlzqMw_Kf~RLOFCz8bL`41*O!`#uF?=E=>i<_eYF{FFQd(A}0AI(w&do&g8W zKsMKzerU}tm-Nq@Q0jTH6^jmBS?vZCqv<4)U(EE9N_1+rWcbV9&q`Sp%4eRFLbexE z#SXj-$A}^9RT#iYD0lOZNo;>$PkJOX$8ovM6RMCoBfS7Z3vC9*7UvJ647 zWC68rcFL`H5varTzavrM!8XG!czVw2@VZGKiYizRqb6U-y{Wzn3MEGg6>|ESQPB6w zuKC9S&J~HDhb;4&Gu;SNZ3~dBbrhwmXo7Z^$KBV3HclYjMS6Q2Ig_!33q=q3Rl+Cd`J z$M-fvDa)DUqrh&eq6+gX3zvm)oXZCY?MTME$2;4>w#n=DBmzHft+L46z)+mATc8}*a!58iN0SauuAT;@G5P*rF~-7F-~ z5jzYc_S+IDyo%v!jrhk2jX4_J12>Jcda`Hd}VHKTY_ zmy64YU1*9C46dsb@a?bNK1<$j+cx^G!tnZjuWFhA`6sHcHr;(AVeEfdjJKxpT~34U zX*8}m=M)UE7gX~fUes?C@F-=LAuMbiR%3)p63K&NG)GB)qOVZ`ne{lJIc&Nql$`Os z?*4X1U-Ib>q3V`ch!a#KUxG*s^6G+}e*%Ei6S503dpB-S{13ZJSlFEW{TnK`C}xD% zcX-_(Mqd0DDM0HZ9?==S?<_fT-FU373tkMNGT?p6zKHxMWO`(~rB|6Q~ym+`zhU3Q7didytslu0%O^|z`d{&;#-7FE;i5B}2 zlW`o~t|$1ga#NFu06kWUV>mJw09P+BxsUAJ#YDb-_hy+%={a=KZwuxX`T;k zz0kFovb?3Gu>^BBq)6bVxp<@Rn;k)X$blGMNRleMOo1|m{>3G$` zq|8l8Jr3IRlM1A;ncxNyDKeWV?$)W#rCL3O<5j{lyXAb&Z>_3rDCSBNGBqE&p(+vl zg1?4hx&d(DO%>_ktki3GqsKzY#jWlK&A_aqt2+V)jPw(x0x=(Gy`M+2prJm6Tn}f) zNyB$qUuD1(SxEo&p}M#ziv+a}h?-g@pUKmS41f{AGJ5KQfp?icKY;1WJj!yk(+xNA zAwR$E;TO-B;x$jr#ZBKpa-JyeUTZQ`gXH_s!qITXi{vDT9Olt}>v?HnZ7f?-;~~cd zh<;j(xM_@ps#ANNPJnqTWjipobxw(V(J#6o$^V}(YMlzh^FKPEb>>5fxopONM9$0G z*FHkViGGQke(fGLzZp%s-`>*kp=u6Xs}tz8B_&uzyyeQcf21Sq{xG^8IUF za%uO~Xw(7#eI?_636(3S506mdK>#D>1P@fS>P1@cz=|?sG%-fy5d%k@>8Cr58fi!x zyljo?Z4o&Gr~yr@b?eAK;hG-j4Fc`_CEXUuf}j*6II}~@%v6U|m6dXHIjo^3$jT>f zF^!YL?zv5fC6KC6$KBIiNM`7-`!<_})WHOP62!B(;@|MhS=&fAU@VotD%R8& ze}6F|ZqV+_{vUE!YUpffh*W|XgQMfz@0c_c5z1L&-Y%g9m#rA6+=$URe5^5TbuVb-DXt#K~31utU_ zNPrQ>>4JR#9xS_4_WRoxsHi%r;@q*XSJx&w7xqHo{#%UDxmp&=7lqZC#$&yA#Ca{} z2lrYpzWsF>jo?+)4@6WCg@ir>qE{at&UnT(RXzwI2Q=*1sMK`c_`ZCj`ACUk(z9^h zke9807%@!oE`oW1G=M)3GHZ)ywW2xWCc_#_S5&)@ASmR^xG3B&znIpkzePAuXm9yO zSM71_$*F4+_z!DJ?q8lxGygP;xzv?T)k3ly$Vi{lB8&n?=Ye0*uHhl_F2}~*7X<29-kPvRJssSdLtiJ>aXe9GUWDF3l(%1 zTeZ4INX%S;_+Y%jr$v4L;T>yi@*Ax?c4oqS5o}5nfR+W1_o;q@j!I zAFWqm`#1Yb43Zt`} zHc0W>&euPHWC#b0@}g+8bGGPreWBS=(;Oa`6O;zgPY$}R>nK{0J;4eZuup{x7Y(w^ zEs)S~^!4yoZ^W1?e$2-9?BK9F(AA1xziJ(2Uy~?C6NHUWV}{wCN$GOM>vlS;OMAy# zeHH?*XCqA^qpe;Y7&%t%_maZ0TN!PE#fTh z^#W@+KhOF2 zcJ3R=?qHinFqF^wkz!W<-J2;r7{SwcIbr$vGr5piZv7Hta(AaM>r#u%u1*4Zuhf%I ziEfSSiapAnb24*Gwn$q?1FJ!R6ARiQ(jhTKE~O9>kU#MWI% zW(FU*Zw5*{)!pDXV~hBY8LK<5_I9TSAm4f<`{iBTcT65qslZRYRDa zIm7~m^T0X{U!=Rk2a2v>lOjN}uX;d