diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..1ca7f33 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,254 @@ +# AGENTS.md - AI Coding Agent Guidelines + +This document provides guidelines for AI coding agents working on the BillAI project. + +## Project Overview + +BillAI is a microservices-based personal bill analysis system supporting WeChat and Alipay bill parsing, intelligent categorization, and visualization. + +**Architecture:** +- `web/` - Frontend (SvelteKit 5 + TailwindCSS 4.x + TypeScript) +- `server/` - Backend API (Go 1.21 + Gin + MongoDB) +- `analyzer/` - Python analysis service (Python 3.12 + FastAPI) + +## Build, Lint, and Test Commands + +### Frontend (web/) + +```bash +# Development +npm run dev # Start dev server (Vite) + +# Build +npm run build # Production build +npm run preview # Preview production build + +# Type checking +npm run check # svelte-check with TypeScript +npm run check:watch # Watch mode + +# Linting and formatting +npm run lint # Prettier check + ESLint +npm run format # Format with Prettier + +# Testing +npm run test # Run all tests once +npm run test:unit # Run tests in watch mode +npx vitest run src/demo.spec.ts # Run single test file +npx vitest run -t "sum test" # Run tests matching name +npx vitest run src/routes/page.svelte.spec.ts # Run component test +``` + +### Backend (server/) + +```bash +# Run +go run . # Start development server + +# Build +go build . # Build binary + +# Dependencies +go mod download # Install dependencies +go mod tidy # Clean up dependencies + +# Testing (if tests exist) +go test ./... # Run all tests +go test ./handler/... # Run tests in specific package +go test -run TestName # Run single test by name +``` + +### Analyzer (analyzer/) + +```bash +# Setup +python -m venv venv +pip install -r requirements.txt + +# Run +python server.py # Start FastAPI server + +# Testing (if tests exist) +pytest # Run all tests +pytest test_file.py # Run single test file +pytest -k "test_name" # Run tests matching name +``` + +### Docker + +```bash +docker-compose up -d --build # Start all services +docker-compose ps # Check service status +docker-compose down # Stop all services +docker-compose logs -f web # Follow logs for specific service +``` + +## Code Style Guidelines + +### TypeScript/Svelte (Frontend) + +**Formatting (Prettier):** +- Use tabs for indentation +- Single quotes for strings +- No trailing commas +- Print width: 100 characters + +**Imports:** +- Use `$lib/` alias for imports from `src/lib/` +- Use `$app/` for SvelteKit internals +- Group imports: external packages, then internal modules + +```typescript +import { browser } from '$app/environment'; +import { auth } from '$lib/stores/auth'; +import type { UIBill } from '$lib/models/bill'; +``` + +**Types:** +- Define interfaces for API responses and requests +- Use `type` for unions and simple type aliases +- Export types from dedicated files in `$lib/types/` or alongside models + +```typescript +export interface UploadResponse { + result: boolean; + message: string; + data?: UploadData; +} +``` + +**Naming Conventions:** +- PascalCase: Components, interfaces, types +- camelCase: Functions, variables, properties +- Use descriptive names: `fetchBills`, `UIBill`, `checkHealth` + +**Error Handling:** +- Wrap API calls in try/catch +- Throw `Error` with HTTP status for API failures +- Handle 401 responses with logout redirect + +```typescript +if (!response.ok) { + throw new Error(`HTTP ${response.status}`); +} +``` + +### Go (Backend) + +**Project Structure:** +- `handler/` - HTTP request handlers +- `service/` - Business logic +- `repository/` - Data access layer +- `model/` - Data structures +- `adapter/` - External service integrations +- `config/` - Configuration management +- `middleware/` - Auth and other middleware + +**Naming Conventions:** +- PascalCase: Exported types, functions, constants +- camelCase: Unexported functions, variables +- Use descriptive names: `UpdateBillRequest`, `parseBillTime` + +**Error Handling:** +- Define sentinel errors in `repository/errors.go` +- Return errors up the call stack +- Use structured JSON responses for HTTP errors + +```go +if err == repository.ErrNotFound { + c.JSON(http.StatusNotFound, Response{Result: false, Message: "not found"}) + return +} +``` + +**JSON Tags:** +- Use snake_case for JSON field names +- Use `omitempty` for optional fields +- Match frontend API expectations + +```go +type UpdateBillRequest struct { + Category *string `json:"category,omitempty"` + Amount *float64 `json:"amount,omitempty"` +} +``` + +**Response Format:** +- All API responses use consistent structure: + +```go +type Response struct { + Result bool `json:"result"` + Message string `json:"message,omitempty"` + Data interface{} `json:"data,omitempty"` +} +``` + +### Python (Analyzer) + +**Style:** +- Follow PEP 8 +- Use type hints for function signatures +- Use Pydantic models for request/response validation + +```python +def do_clean( + input_path: str, + output_path: str, + bill_type: str = "auto" +) -> tuple[bool, str, str]: +``` + +**Error Handling:** +- Raise `HTTPException` for API errors +- Use try/except for file operations +- Return structured responses + +```python +if not success: + raise HTTPException(status_code=400, detail=message) +``` + +## Testing Guidelines + +**Frontend Tests:** +- Use Vitest with Playwright for browser testing +- Component tests: `*.svelte.spec.ts` +- Unit tests: `*.spec.ts` +- Tests require assertions: `expect.assertions()` or explicit expects + +```typescript +import { describe, it, expect } from 'vitest'; +import { render } from 'vitest-browser-svelte'; + +describe('/+page.svelte', () => { + it('should render h1', async () => { + render(Page); + await expect.element(page.getByRole('heading')).toBeInTheDocument(); + }); +}); +``` + +## Important Patterns + +**API Communication:** +- Frontend proxies API calls through SvelteKit to avoid CORS +- Backend uses Gin framework with JSON responses +- Analyzer communicates via HTTP (preferred) or subprocess + +**Data Flow:** +- Frontend (SvelteKit) -> Backend (Go/Gin) -> MongoDB +- Backend -> Analyzer (Python/FastAPI) for bill parsing + +**Authentication:** +- JWT tokens stored in frontend auth store +- Bearer token sent in Authorization header +- 401 responses trigger logout and redirect + +## File Locations + +- API types: `web/src/lib/api.ts` +- UI models: `web/src/lib/models/` +- Go handlers: `server/handler/` +- Go models: `server/model/` +- Python API: `analyzer/server.py` diff --git a/CHANGELOG.md b/CHANGELOG.md index 90e186e..c23f4d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,27 +5,22 @@ 格式基于 [Keep a Changelog](https://keepachangelog.com/zh-CN/1.0.0/), 版本号遵循 [语义化版本](https://semver.org/lang/zh-CN/)。 -## [1.0.9] - 2026-01-19 - -### 移除 -- **移除 Webhook 自动部署功能** - 删除 webhook 服务及相关文件 - - 删除 `webhook/` 目录(Dockerfile、main.go、go.mod、README.md) - - 删除 `deploy.sh` 部署脚本 - - 删除 `WEBHOOK_SETUP.md` 配置文档 - - 移除 docker-compose.yaml 中的 webhook 服务配置 - -## [1.0.8] - 2026-01-18 - -### 重构 -- **前端账单模型统一为 UIBill** - 分析链路与详情弹窗只使用一套 UI 模型(camelCase + amount:number),移除 BillRecord 混用带来的字段/类型转换散落 - - 分析页、统计服务与各分析组件统一使用 `UIBill[]` - - CSV 解析(下载账单内容)直接输出 `UIBill[]` +## [1.1.0] - 2026-01-23 ### 新增 -- **账单详情弹窗抽象组件** - 新增 `BillDetailDrawer`,复用单笔账单的查看/编辑 UI 结构 +- **ZIP 压缩包上传** - 支持上传加密的 ZIP 压缩包(微信/支付宝导出的原始格式) + - 支持 AES 加密的 ZIP 文件,需输入解压密码 + - 自动将 xlsx 格式转换为 csv + - 自动将 GBK 编码转换为 UTF-8 + - 前端添加密码输入框 -### 优化 -- **前端检查更干净** - 修复图表容器的派生值捕获告警,并为趋势图增加键盘可访问性,`npm run check` 达到 0 warnings +### 修复 +- **支付宝扩展格式解析** - 修复从 ZIP 解压的支付宝账单(含 24 行元数据头)无法解析的问题 +- **CSV 字段数不一致** - 修复支付宝 CSV 文件字段数不一致导致解析失败的问题 +- **中文文件名乱码** - 修复 ZIP 内 GBK 编码的中文文件名解压后乱码的问题 + +### 其他 +- 添加 `AGENTS.md` 项目开发指南文档 ## [1.0.7] - 2026-01-16 diff --git a/analyzer/cleaners/alipay.py b/analyzer/cleaners/alipay.py index 95c3b4f..fab7eef 100644 --- a/analyzer/cleaners/alipay.py +++ b/analyzer/cleaners/alipay.py @@ -18,11 +18,31 @@ class AlipayCleaner(BaseCleaner): """执行清理""" self.print_header() - # 读取数据 + # 读取数据,跳过支付宝导出文件的头部信息 with open(self.input_file, "r", encoding="utf-8") as f: reader = csv.reader(f) - header = next(reader) - rows = list(reader) + header = None + rows = [] + + for row in reader: + # 跳过空行 + if not row or not row[0].strip(): + continue + + # 查找实际的CSV头部行(包含"交易时间"和"交易分类") + if header is None: + if len(row) >= 2 and "交易时间" in row[0] and "交易分类" in row[1]: + header = row + continue + # 跳过头部信息行 + continue + + # 收集数据行 + rows.append(row) + + # 确保找到了有效的头部 + if header is None: + raise ValueError("无法找到有效的支付宝账单表头(需包含'交易时间'和'交易分类'列)") self.stats["original_count"] = len(rows) print(f"原始数据行数: {len(rows)}") diff --git a/analyzer/converter.py b/analyzer/converter.py new file mode 100644 index 0000000..de8d608 --- /dev/null +++ b/analyzer/converter.py @@ -0,0 +1,188 @@ +""" +账单文件格式转换模块 + +支持: +- xlsx -> csv 转换 +- GBK/GB2312 -> UTF-8 编码转换 +- 账单类型自动检测 +""" +import os +import csv +import tempfile +from pathlib import Path +from typing import Optional, Tuple + +# 尝试导入 openpyxl,用于读取 xlsx 文件 +try: + from openpyxl import load_workbook + HAS_OPENPYXL = True +except ImportError: + HAS_OPENPYXL = False + + +def detect_encoding(filepath: str) -> str: + """ + 检测文件编码 + + Returns: + 'utf-8', 'gbk', 或 'utf-8-sig' + """ + # 尝试读取前几行来检测编码 + encodings = ['utf-8', 'utf-8-sig', 'gbk', 'gb2312', 'gb18030'] + + for encoding in encodings: + try: + with open(filepath, 'r', encoding=encoding) as f: + # 尝试读取前 10 行 + for _ in range(10): + f.readline() + return encoding + except (UnicodeDecodeError, UnicodeError): + continue + + # 默认使用 gbk + return 'gbk' + + +def detect_bill_type_from_content(content: str, filename: str = "") -> str: + """ + 从内容和文件名检测账单类型 + + Returns: + 'alipay', 'wechat', 或 '' + """ + # 从文件名检测 + filename_lower = filename.lower() + if '支付宝' in filename or 'alipay' in filename_lower: + return 'alipay' + if '微信' in filename or 'wechat' in filename_lower: + return 'wechat' + + # 从内容检测 + # 支付宝特征: 有 "交易分类" 和 "对方账号" 列 + if '交易分类' in content and '对方账号' in content: + return 'alipay' + + # 微信特征: 有 "交易类型" 和 "金额(元)" 列 + if '交易类型' in content and '金额(元)' in content: + return 'wechat' + + return '' + + +def convert_xlsx_to_csv(xlsx_path: str, csv_path: str) -> Tuple[bool, str]: + """ + 将 xlsx 文件转换为 csv 文件 + + Returns: + (success, message) + """ + if not HAS_OPENPYXL: + return False, "缺少 openpyxl 库,无法读取 xlsx 文件。请运行: pip install openpyxl" + + try: + wb = load_workbook(xlsx_path, read_only=True, data_only=True) + ws = wb.active + + with open(csv_path, 'w', encoding='utf-8', newline='') as f: + writer = csv.writer(f) + for row in ws.iter_rows(values_only=True): + # 跳过全空行 + if all(cell is None for cell in row): + continue + # 将 None 转换为空字符串 + writer.writerow(['' if cell is None else str(cell) for cell in row]) + + wb.close() + return True, "xlsx 转换成功" + + except Exception as e: + return False, f"xlsx 转换失败: {str(e)}" + + +def convert_csv_encoding(input_path: str, output_path: str, source_encoding: str = 'auto') -> Tuple[bool, str]: + """ + 将 csv 文件从 GBK/其他编码转换为 UTF-8 + + Returns: + (success, message) + """ + if source_encoding == 'auto': + source_encoding = detect_encoding(input_path) + + # 如果已经是 UTF-8,直接复制 + if source_encoding in ('utf-8', 'utf-8-sig'): + if input_path != output_path: + import shutil + shutil.copy(input_path, output_path) + return True, "文件已是 UTF-8 编码" + + try: + with open(input_path, 'r', encoding=source_encoding) as f_in: + content = f_in.read() + + with open(output_path, 'w', encoding='utf-8', newline='') as f_out: + f_out.write(content) + + return True, f"编码转换成功: {source_encoding} -> utf-8" + + except Exception as e: + return False, f"编码转换失败: {str(e)}" + + +def convert_bill_file(input_path: str, output_path: Optional[str] = None) -> Tuple[bool, str, str, str]: + """ + 转换账单文件为标准 CSV 格式(UTF-8 编码) + + 支持: + - xlsx -> csv 转换 + - GBK/GB2312 -> UTF-8 编码转换 + + Args: + input_path: 输入文件路径 + output_path: 输出文件路径(可选,默认在同目录生成) + + Returns: + (success, bill_type, output_path, message) + """ + input_path = Path(input_path) + + if not input_path.exists(): + return False, '', '', f"文件不存在: {input_path}" + + # 确定输出路径 + if output_path is None: + # 生成临时文件 + suffix = '.csv' + fd, output_path = tempfile.mkstemp(suffix=suffix) + os.close(fd) + + ext = input_path.suffix.lower() + bill_type = '' + + if ext == '.xlsx': + # xlsx 转换 + success, message = convert_xlsx_to_csv(str(input_path), output_path) + if not success: + return False, '', '', message + + # 读取内容检测账单类型 + with open(output_path, 'r', encoding='utf-8') as f: + content = f.read(2000) # 只读取前 2000 字符用于检测 + bill_type = detect_bill_type_from_content(content, input_path.name) + + elif ext == '.csv': + # CSV 编码转换 + success, message = convert_csv_encoding(str(input_path), output_path) + if not success: + return False, '', '', message + + # 读取内容检测账单类型 + with open(output_path, 'r', encoding='utf-8') as f: + content = f.read(2000) + bill_type = detect_bill_type_from_content(content, input_path.name) + + else: + return False, '', '', f"不支持的文件格式: {ext}" + + return True, bill_type, output_path, "转换成功" diff --git a/analyzer/requirements.txt b/analyzer/requirements.txt index 3dde17e..eda27a3 100644 --- a/analyzer/requirements.txt +++ b/analyzer/requirements.txt @@ -2,3 +2,4 @@ pyyaml>=6.0 fastapi>=0.109.0 uvicorn[standard]>=0.27.0 python-multipart>=0.0.6 +openpyxl>=3.1.0 diff --git a/analyzer/server.py b/analyzer/server.py index 19cca9d..b5451ef 100644 --- a/analyzer/server.py +++ b/analyzer/server.py @@ -24,6 +24,7 @@ if sys.stdout.encoding != 'utf-8': from cleaners.base import compute_date_range_from_values from cleaners import AlipayCleaner, WechatCleaner from category import infer_category, get_all_categories, get_all_income_categories +from converter import convert_bill_file # 应用版本 APP_VERSION = "0.0.1" @@ -72,6 +73,14 @@ class HealthResponse(BaseModel): version: str +class ConvertResponse(BaseModel): + """文件转换响应""" + success: bool + bill_type: str + output_path: str + message: str + + # ============================================================================= # 辅助函数 # ============================================================================= @@ -85,7 +94,7 @@ def detect_bill_type(filepath: str) -> str | None: """ try: with open(filepath, "r", encoding="utf-8") as f: - for _ in range(20): + for _ in range(50): # 支付宝账单可能有较多的头部信息行 line = f.readline() if not line: break @@ -337,6 +346,43 @@ async def detect_bill_type_api(file: UploadFile = File(...)): os.unlink(tmp_path) +@app.post("/convert", response_model=ConvertResponse) +async def convert_bill_file_api(file: UploadFile = File(...)): + """ + 转换账单文件格式 + + 支持: + - xlsx -> csv 转换 + - GBK/GB2312 -> UTF-8 编码转换 + + 返回转换后的文件路径和检测到的账单类型 + """ + # 保存上传的文件到临时位置 + suffix = Path(file.filename).suffix or ".csv" + with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: + shutil.copyfileobj(file.file, tmp) + input_path = tmp.name + + try: + # 调用转换函数 + success, bill_type, output_path, message = convert_bill_file(input_path) + + if not success: + raise HTTPException(status_code=400, detail=message) + + return ConvertResponse( + success=True, + bill_type=bill_type, + output_path=output_path, + message=message + ) + + finally: + # 清理输入临时文件(转换后的输出文件由调用方负责清理) + if os.path.exists(input_path): + os.unlink(input_path) + + # ============================================================================= # 启动入口 # ============================================================================= diff --git a/mock_data/微信支付账单(测试数据密码123456).zip b/mock_data/微信支付账单(测试数据密码123456).zip new file mode 100644 index 0000000..9274c2c Binary files /dev/null and b/mock_data/微信支付账单(测试数据密码123456).zip differ diff --git a/mock_data/支付宝交易明细(测试数据密码123456).zip b/mock_data/支付宝交易明细(测试数据密码123456).zip new file mode 100644 index 0000000..10f2845 Binary files /dev/null and b/mock_data/支付宝交易明细(测试数据密码123456).zip differ diff --git a/server/adapter/adapter.go b/server/adapter/adapter.go index b061304..ca1649d 100644 --- a/server/adapter/adapter.go +++ b/server/adapter/adapter.go @@ -17,6 +17,12 @@ type CleanResult struct { Output string // 脚本输出信息 } +// ConvertResult 格式转换结果 +type ConvertResult struct { + OutputPath string // 转换后的文件路径 + BillType string // 检测到的账单类型: alipay/wechat +} + // Cleaner 账单清洗器接口 // 负责将原始账单数据清洗为标准格式 type Cleaner interface { @@ -25,4 +31,9 @@ type Cleaner interface { // outputPath: 输出文件路径 // opts: 清洗选项 Clean(inputPath, outputPath string, opts *CleanOptions) (*CleanResult, error) + + // Convert 转换账单文件格式(xlsx -> csv,处理 GBK 编码等) + // inputPath: 输入文件路径 + // 返回: 转换后的文件路径, 检测到的账单类型, 错误 + Convert(inputPath string) (outputPath string, billType string, err error) } diff --git a/server/adapter/http/cleaner.go b/server/adapter/http/cleaner.go index da60cb1..dc96562 100644 --- a/server/adapter/http/cleaner.go +++ b/server/adapter/http/cleaner.go @@ -185,6 +185,88 @@ func (c *Cleaner) downloadFile(remotePath, localPath string) error { return nil } +// ConvertResponse 转换响应 +type ConvertResponse struct { + Success bool `json:"success"` + BillType string `json:"bill_type"` + Message string `json:"message"` + OutputPath string `json:"output_path,omitempty"` +} + +// Convert 转换账单文件格式(xlsx -> csv,处理 GBK 编码等) +func (c *Cleaner) Convert(inputPath string) (outputPath string, billType string, err error) { + // 打开输入文件 + file, err := os.Open(inputPath) + if err != nil { + return "", "", fmt.Errorf("打开文件失败: %w", err) + } + defer file.Close() + + // 创建 multipart form + var body bytes.Buffer + writer := multipart.NewWriter(&body) + + // 添加文件 + part, err := writer.CreateFormFile("file", filepath.Base(inputPath)) + if err != nil { + return "", "", fmt.Errorf("创建表单文件失败: %w", err) + } + if _, err := io.Copy(part, file); err != nil { + return "", "", fmt.Errorf("复制文件内容失败: %w", err) + } + writer.Close() + + // 发送转换请求 + fmt.Printf("🌐 调用转换服务: %s/convert\n", c.baseURL) + req, err := http.NewRequest("POST", c.baseURL+"/convert", &body) + if err != nil { + return "", "", fmt.Errorf("创建请求失败: %w", err) + } + req.Header.Set("Content-Type", writer.FormDataContentType()) + + resp, err := c.httpClient.Do(req) + if err != nil { + return "", "", fmt.Errorf("HTTP 请求失败: %w", err) + } + defer resp.Body.Close() + + // 读取响应 + respBody, err := io.ReadAll(resp.Body) + if err != nil { + return "", "", fmt.Errorf("读取响应失败: %w", err) + } + + // 处理错误响应 + if resp.StatusCode != http.StatusOK { + var errResp ErrorResponse + if err := json.Unmarshal(respBody, &errResp); err == nil { + return "", "", fmt.Errorf("转换失败: %s", errResp.Detail) + } + return "", "", fmt.Errorf("转换失败: HTTP %d - %s", resp.StatusCode, string(respBody)) + } + + // 解析成功响应 + var convertResp ConvertResponse + if err := json.Unmarshal(respBody, &convertResp); err != nil { + return "", "", fmt.Errorf("解析响应失败: %w", err) + } + + // 下载转换后的文件到本地(与输入文件同目录,但扩展名改为 .csv) + localOutputPath := inputPath[:len(inputPath)-len(filepath.Ext(inputPath))] + ".csv" + fmt.Printf(" 下载转换后文件: %s -> %s\n", convertResp.OutputPath, localOutputPath) + if err := c.downloadFile(convertResp.OutputPath, localOutputPath); err != nil { + return "", "", fmt.Errorf("下载转换结果失败: %w", err) + } + + // 验证文件是否存在 + if _, err := os.Stat(localOutputPath); err != nil { + return "", "", fmt.Errorf("下载后文件不存在: %s", localOutputPath) + } + fmt.Printf(" 文件下载成功,已保存到: %s\n", localOutputPath) + + return localOutputPath, convertResp.BillType, nil +} + // HealthCheck 检查 Python 服务健康状态 func (c *Cleaner) HealthCheck() error { resp, err := c.httpClient.Get(c.baseURL + "/health") diff --git a/server/adapter/python/cleaner.go b/server/adapter/python/cleaner.go index d08b4d2..13dfe66 100644 --- a/server/adapter/python/cleaner.go +++ b/server/adapter/python/cleaner.go @@ -90,5 +90,11 @@ func detectBillTypeFromOutput(output string) string { return "" } +// Convert 转换账单文件格式(xlsx -> csv,处理 GBK 编码等) +// 子进程模式不支持此功能,请使用 HTTP 模式 +func (c *Cleaner) Convert(inputPath string) (outputPath string, billType string, err error) { + return "", "", fmt.Errorf("子进程模式不支持文件格式转换,请使用 HTTP 模式 (analyzer_mode: http)") +} + // 确保 Cleaner 实现了 adapter.Cleaner 接口 var _ adapter.Cleaner = (*Cleaner)(nil) diff --git a/server/go.mod b/server/go.mod index 0da7bb9..127c341 100644 --- a/server/go.mod +++ b/server/go.mod @@ -4,7 +4,10 @@ go 1.21 require ( github.com/gin-gonic/gin v1.9.1 + github.com/golang-jwt/jwt/v5 v5.3.0 + github.com/yeka/zip v0.0.0-20231116150916-03d6312748a9 go.mongodb.org/mongo-driver v1.13.1 + golang.org/x/text v0.9.0 gopkg.in/yaml.v3 v3.0.1 ) @@ -17,7 +20,6 @@ require ( github.com/go-playground/universal-translator v0.18.1 // indirect github.com/go-playground/validator/v10 v10.14.0 // indirect github.com/goccy/go-json v0.10.2 // indirect - github.com/golang-jwt/jwt/v5 v5.3.0 // indirect github.com/golang/snappy v0.0.1 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/compress v1.13.6 // indirect @@ -39,6 +41,5 @@ require ( golang.org/x/net v0.10.0 // indirect golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 // indirect golang.org/x/sys v0.8.0 // indirect - golang.org/x/text v0.9.0 // indirect google.golang.org/protobuf v1.30.0 // indirect ) diff --git a/server/go.sum b/server/go.sum index c229350..2ab1518 100644 --- a/server/go.sum +++ b/server/go.sum @@ -75,6 +75,8 @@ github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= +github.com/yeka/zip v0.0.0-20231116150916-03d6312748a9 h1:K8gF0eekWPEX+57l30ixxzGhHH/qscI3JCnuhbN6V4M= +github.com/yeka/zip v0.0.0-20231116150916-03d6312748a9/go.mod h1:9BnoKCcgJ/+SLhfAXj15352hTOuVmG5Gzo8xNRINfqI= github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d h1:splanxYIlg+5LfHAM6xpdFEAYOk8iySO56hMFq6uLyA= github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= diff --git a/server/handler/upload.go b/server/handler/upload.go index ca33192..4f34e63 100644 --- a/server/handler/upload.go +++ b/server/handler/upload.go @@ -8,6 +8,7 @@ import ( "net/http" "os" "path/filepath" + "strings" "time" "github.com/gin-gonic/gin" @@ -18,6 +19,8 @@ import ( ) // Upload 处理账单上传和清理请求 +// 支持直接上传 CSV 文件,或上传 ZIP 压缩包(支持密码保护) +// ZIP 包内可以是 CSV 或 XLSX 格式的账单文件 func Upload(c *gin.Context) { // 1. 获取上传的文件 file, header, err := c.Request.FormFile("file") @@ -37,32 +40,12 @@ func Upload(c *gin.Context) { req.Format = "csv" } - // 验证 type 参数 - if req.Type == "" { - c.JSON(http.StatusBadRequest, model.UploadResponse{ - Result: false, - Message: "请指定账单类型 (type: alipay 或 wechat)", - }) - return - } - if req.Type != "alipay" && req.Type != "wechat" { - c.JSON(http.StatusBadRequest, model.UploadResponse{ - Result: false, - Message: "账单类型无效,仅支持 alipay 或 wechat", - }) - return - } - billType := req.Type - - // 3. 保存上传的文件(添加唯一ID避免覆盖) + // 3. 保存上传的文件 timestamp := time.Now().Format("20060102_150405") uniqueID := generateShortID() - // 获取文件扩展名和基础名 ext := filepath.Ext(header.Filename) baseName := header.Filename[:len(header.Filename)-len(ext)] - - // 文件名格式: 时间戳_唯一ID_原始文件名 inputFileName := fmt.Sprintf("%s_%s_%s%s", timestamp, uniqueID, baseName, ext) uploadDirAbs := config.ResolvePath(config.Global.UploadDir) inputPath := filepath.Join(uploadDirAbs, inputFileName) @@ -76,12 +59,117 @@ func Upload(c *gin.Context) { return } defer dst.Close() - io.Copy(dst, file) + if _, err := io.Copy(dst, file); err != nil { + c.JSON(http.StatusInternalServerError, model.UploadResponse{ + Result: false, + Message: "保存文件失败: " + err.Error(), + }) + return + } + dst.Close() // 关闭文件以便后续处理 - // 4. 对原始数据进行去重检查 + // 4. 处理文件:如果是 ZIP 则解压,否则直接处理 + var billFilePath string + var billType string + var extractedFiles []string + var needConvert bool // 是否需要格式转换(xlsx -> csv) + + if service.IsSupportedArchive(header.Filename) { + // 解压 ZIP 文件 + fmt.Printf("📦 检测到 ZIP 文件,开始解压...\n") + extractResult, err := service.ExtractZip(inputPath, uploadDirAbs, req.Password) + if err != nil { + c.JSON(http.StatusBadRequest, model.UploadResponse{ + Result: false, + Message: "解压失败: " + err.Error(), + }) + return + } + + billFilePath = extractResult.BillFile + extractedFiles = extractResult.ExtractedFiles + + // 使用从文件名检测到的账单类型(如果用户未指定) + if req.Type == "" && extractResult.BillType != "" { + billType = extractResult.BillType + } + + fmt.Printf(" 解压完成,账单文件: %s\n", filepath.Base(billFilePath)) + + // ZIP 中提取的文件需要格式转换(xlsx 需要转 csv,csv 可能需要编码转换) + needConvert = true + } else { + // 直接使用上传的文件 + billFilePath = inputPath + + // 检查是否为 xlsx 格式 + if strings.HasSuffix(strings.ToLower(header.Filename), ".xlsx") { + needConvert = true + } + } + + // 5. 如果需要格式/编码转换,调用 analyzer 服务 + if needConvert { + fmt.Printf("📊 调用分析服务进行格式/编码转换...\n") + convertedPath, detectedType, err := service.ConvertBillFile(billFilePath) + if err != nil { + // 清理临时文件 + service.CleanupExtractedFiles(extractedFiles) + c.JSON(http.StatusBadRequest, model.UploadResponse{ + Result: false, + Message: "文件转换失败: " + err.Error(), + }) + return + } + // 如果转换后的路径与原路径不同,删除原始文件 + if convertedPath != billFilePath { + os.Remove(billFilePath) + } + billFilePath = convertedPath + + // 使用检测到的账单类型 + if req.Type == "" && detectedType != "" { + billType = detectedType + } + fmt.Printf(" 转换完成: %s\n", filepath.Base(convertedPath)) + } + + // 6. 确定账单类型 + if req.Type != "" { + billType = req.Type + } + if billType == "" { + // 尝试从文件名检测 + fileName := strings.ToLower(filepath.Base(billFilePath)) + if strings.Contains(fileName, "支付宝") || strings.Contains(fileName, "alipay") { + billType = "alipay" + } else if strings.Contains(fileName, "微信") || strings.Contains(fileName, "wechat") { + billType = "wechat" + } + } + if billType == "" { + // 清理临时文件 + service.CleanupExtractedFiles(extractedFiles) + c.JSON(http.StatusBadRequest, model.UploadResponse{ + Result: false, + Message: "无法识别账单类型,请指定 type 参数 (alipay 或 wechat)", + }) + return + } + if billType != "alipay" && billType != "wechat" { + service.CleanupExtractedFiles(extractedFiles) + c.JSON(http.StatusBadRequest, model.UploadResponse{ + Result: false, + Message: "账单类型无效,仅支持 alipay 或 wechat", + }) + return + } + + // 7. 对原始数据进行去重检查 fmt.Printf("📋 开始去重检查...\n") - dedupResult, dedupErr := service.DeduplicateRawFile(inputPath, timestamp) + dedupResult, dedupErr := service.DeduplicateRawFile(billFilePath, timestamp) if dedupErr != nil { + service.CleanupExtractedFiles(extractedFiles) c.JSON(http.StatusInternalServerError, model.UploadResponse{ Result: false, Message: "去重检查失败: " + dedupErr.Error(), @@ -97,6 +185,7 @@ func Upload(c *gin.Context) { // 如果全部重复,返回提示 if dedupResult.NewCount == 0 { + service.CleanupExtractedFiles(extractedFiles) c.JSON(http.StatusOK, model.UploadResponse{ Result: true, Message: fmt.Sprintf("文件中的 %d 条记录全部已存在,无需重复导入", dedupResult.OriginalCount), @@ -113,7 +202,7 @@ func Upload(c *gin.Context) { // 使用去重后的文件路径进行后续处理 processFilePath := dedupResult.DedupFilePath - // 5. 构建输出文件路径:时间_type_编号 + // 8. 构建输出文件路径 outputExt := ".csv" if req.Format == "json" { outputExt = ".json" @@ -123,7 +212,7 @@ func Upload(c *gin.Context) { outputFileName := fmt.Sprintf("%s_%s_%s%s", timestamp, billType, fileSeq, outputExt) outputPath := filepath.Join(outputDirAbs, outputFileName) - // 6. 执行 Python 清洗脚本 + // 9. 执行 Python 清洗脚本 cleanOpts := &service.CleanOptions{ Year: req.Year, Month: req.Month, @@ -133,6 +222,7 @@ func Upload(c *gin.Context) { } _, cleanErr := service.RunCleanScript(processFilePath, outputPath, cleanOpts) if cleanErr != nil { + service.CleanupExtractedFiles(extractedFiles) c.JSON(http.StatusInternalServerError, model.UploadResponse{ Result: false, Message: cleanErr.Error(), @@ -140,7 +230,7 @@ func Upload(c *gin.Context) { return } - // 7. 将去重后的原始数据存入 MongoDB(原始数据集合) + // 10. 将去重后的原始数据存入 MongoDB rawCount, rawErr := service.SaveRawBillsFromFile(processFilePath, billType, header.Filename, timestamp) if rawErr != nil { fmt.Printf("⚠️ 存储原始数据到 MongoDB 失败: %v\n", rawErr) @@ -148,7 +238,7 @@ func Upload(c *gin.Context) { fmt.Printf("✅ 已存储 %d 条原始账单记录到 MongoDB\n", rawCount) } - // 9. 将清洗后的数据存入 MongoDB(清洗后数据集合) + // 11. 将清洗后的数据存入 MongoDB cleanedCount, _, cleanedErr := service.SaveCleanedBillsFromFile(outputPath, req.Format, billType, header.Filename, timestamp) if cleanedErr != nil { fmt.Printf("⚠️ 存储清洗后数据到 MongoDB 失败: %v\n", cleanedErr) @@ -156,12 +246,13 @@ func Upload(c *gin.Context) { fmt.Printf("✅ 已存储 %d 条清洗后账单记录到 MongoDB\n", cleanedCount) } - // 10. 清理临时的去重文件(如果生成了的话) + // 12. 清理临时文件 if dedupResult.DedupFilePath != inputPath && dedupResult.DedupFilePath != "" { os.Remove(dedupResult.DedupFilePath) } + service.CleanupExtractedFiles(extractedFiles) - // 11. 返回成功响应 + // 13. 返回成功响应 message := fmt.Sprintf("处理成功,新增 %d 条记录", cleanedCount) if dedupResult.DuplicateCount > 0 { message = fmt.Sprintf("处理成功,新增 %d 条,跳过 %d 条重复记录", cleanedCount, dedupResult.DuplicateCount) @@ -182,7 +273,6 @@ func Upload(c *gin.Context) { } // generateFileSequence 生成文件序号 -// 根据当前目录下同一时间戳和类型的文件数量生成序号 func generateFileSequence(dir, timestamp, billType, ext string) string { pattern := fmt.Sprintf("%s_%s_*%s", timestamp, billType, ext) matches, err := filepath.Glob(filepath.Join(dir, pattern)) @@ -194,9 +284,8 @@ func generateFileSequence(dir, timestamp, billType, ext string) string { // generateShortID 生成 6 位随机唯一标识符 func generateShortID() string { - bytes := make([]byte, 3) // 3 字节 = 6 个十六进制字符 + bytes := make([]byte, 3) if _, err := rand.Read(bytes); err != nil { - // 如果随机数生成失败,使用时间纳秒作为备选 return fmt.Sprintf("%06x", time.Now().UnixNano()%0xFFFFFF) } return hex.EncodeToString(bytes) diff --git a/server/model/request.go b/server/model/request.go index 70dccd2..b501504 100644 --- a/server/model/request.go +++ b/server/model/request.go @@ -2,10 +2,11 @@ package model // UploadRequest 上传请求参数 type UploadRequest struct { - Type string `form:"type"` // 账单类型: alipay/wechat(必填) - Year string `form:"year"` // 年份筛选 - Month string `form:"month"` // 月份筛选 - Start string `form:"start"` // 起始日期 - End string `form:"end"` // 结束日期 - Format string `form:"format"` // 输出格式: csv/json + Type string `form:"type"` // 账单类型: alipay/wechat(可选,会自动检测) + Password string `form:"password"` // ZIP 文件密码(可选) + Year string `form:"year"` // 年份筛选 + Month string `form:"month"` // 月份筛选 + Start string `form:"start"` // 起始日期 + End string `form:"end"` // 结束日期 + Format string `form:"format"` // 输出格式: csv/json } diff --git a/server/service/archive.go b/server/service/archive.go new file mode 100644 index 0000000..3112ff9 --- /dev/null +++ b/server/service/archive.go @@ -0,0 +1,159 @@ +package service + +import ( + "fmt" + "io" + "os" + "path/filepath" + "strings" + "time" + + "github.com/yeka/zip" + "golang.org/x/text/encoding/simplifiedchinese" + "golang.org/x/text/transform" +) + +// ExtractResult 解压结果 +type ExtractResult struct { + ExtractedFiles []string // 解压出的文件路径 + BillFile string // 账单文件路径(csv 或 xlsx) + BillType string // 检测到的账单类型 +} + +// ExtractZip 解压 ZIP 文件,支持密码 +// 返回解压后的账单文件路径 +func ExtractZip(zipPath, destDir, password string) (*ExtractResult, error) { + reader, err := zip.OpenReader(zipPath) + if err != nil { + return nil, fmt.Errorf("无法打开 ZIP 文件: %w", err) + } + defer reader.Close() + + result := &ExtractResult{ + ExtractedFiles: make([]string, 0), + } + + timestamp := time.Now().Format("20060102_150405") + + for _, file := range reader.File { + // 处理文件名编码(可能是 GBK) + fileName := decodeFileName(file.Name) + + // 安全检查:防止路径遍历 + if strings.Contains(fileName, "..") { + continue + } + + // 获取文件扩展名 + ext := strings.ToLower(filepath.Ext(fileName)) + + // 生成安全的目标文件名(避免编码问题) + // 使用时间戳+序号+扩展名的格式 + safeFileName := fmt.Sprintf("extracted_%s_%d%s", timestamp, len(result.ExtractedFiles), ext) + destPath := filepath.Join(destDir, safeFileName) + + if file.FileInfo().IsDir() { + os.MkdirAll(destPath, 0755) + continue + } + + // 确保目录存在 + if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil { + return nil, fmt.Errorf("创建目录失败: %w", err) + } + + // 设置密码(如果有) + if file.IsEncrypted() { + if password == "" { + return nil, fmt.Errorf("ZIP 文件已加密,请提供密码") + } + file.SetPassword(password) + } + + // 打开文件 + rc, err := file.Open() + if err != nil { + if file.IsEncrypted() { + return nil, fmt.Errorf("密码错误或无法解密文件") + } + return nil, fmt.Errorf("无法读取文件 %s: %w", fileName, err) + } + + // 写入文件 + destFile, err := os.Create(destPath) + if err != nil { + rc.Close() + return nil, fmt.Errorf("创建文件失败: %w", err) + } + + _, err = io.Copy(destFile, rc) + rc.Close() + destFile.Close() + + if err != nil { + return nil, fmt.Errorf("写入文件失败: %w", err) + } + + result.ExtractedFiles = append(result.ExtractedFiles, destPath) + + // 检测账单文件 + if ext == ".csv" || ext == ".xlsx" { + result.BillFile = destPath + + // 检测账单类型(从原始文件名检测) + if strings.Contains(fileName, "支付宝") || strings.Contains(strings.ToLower(fileName), "alipay") { + result.BillType = "alipay" + } else if strings.Contains(fileName, "微信") || strings.Contains(strings.ToLower(fileName), "wechat") { + result.BillType = "wechat" + } + } + } + + if result.BillFile == "" { + return nil, fmt.Errorf("ZIP 文件中未找到账单文件(.csv 或 .xlsx)") + } + + return result, nil +} + +// decodeFileName 尝试将 GBK 编码的文件名转换为 UTF-8 +func decodeFileName(name string) string { + // 如果文件名只包含 ASCII 字符,直接返回 + isAscii := true + for i := 0; i < len(name); i++ { + if name[i] > 127 { + isAscii = false + break + } + } + if isAscii { + return name + } + + // 尝试 GBK 解码 + // Windows 上创建的 ZIP 文件通常使用 GBK 编码中文文件名 + decoded, _, err := transform.String(simplifiedchinese.GBK.NewDecoder(), name) + if err == nil && len(decoded) > 0 { + return decoded + } + return name +} + +// IsSupportedArchive 检查文件是否为支持的压缩格式 +func IsSupportedArchive(filename string) bool { + lower := strings.ToLower(filename) + return strings.HasSuffix(lower, ".zip") +} + +// IsBillFile 检查文件是否为账单文件 +func IsBillFile(filename string) bool { + lower := strings.ToLower(filename) + return strings.HasSuffix(lower, ".csv") || strings.HasSuffix(lower, ".xlsx") +} + +// CleanupExtractedFiles 清理解压的临时文件 +func CleanupExtractedFiles(files []string) { + for _, f := range files { + os.Remove(f) + } +} diff --git a/server/service/bill.go b/server/service/bill.go index 192a4c4..515ab31 100644 --- a/server/service/bill.go +++ b/server/service/bill.go @@ -47,6 +47,7 @@ func DeduplicateRawFile(filePath, uploadBatch string) (*DeduplicateResult, error defer file.Close() reader := csv.NewReader(file) + reader.FieldsPerRecord = -1 // 允许变长记录 rows, err := reader.ReadAll() if err != nil { return nil, fmt.Errorf("读取 CSV 失败: %w", err) @@ -183,6 +184,7 @@ func SaveRawBillsFromFile(filePath, billType, sourceFile, uploadBatch string) (i defer file.Close() reader := csv.NewReader(file) + reader.FieldsPerRecord = -1 // 允许变长记录 rows, err := reader.ReadAll() if err != nil { return 0, fmt.Errorf("读取 CSV 失败: %w", err) @@ -249,6 +251,7 @@ func saveCleanedBillsFromCSV(filePath, billType, sourceFile, uploadBatch string) defer file.Close() reader := csv.NewReader(file) + reader.FieldsPerRecord = -1 // 允许变长记录 rows, err := reader.ReadAll() if err != nil { return 0, 0, fmt.Errorf("读取 CSV 失败: %w", err) diff --git a/server/service/cleaner.go b/server/service/cleaner.go index 691e039..e0b6c1b 100644 --- a/server/service/cleaner.go +++ b/server/service/cleaner.go @@ -20,6 +20,13 @@ func RunCleanScript(inputPath, outputPath string, opts *CleanOptions) (*CleanRes return cleaner.Clean(inputPath, outputPath, opts) } +// ConvertBillFile 转换账单文件格式(xlsx -> csv,处理编码) +// 返回转换后的文件路径和检测到的账单类型 +func ConvertBillFile(inputPath string) (outputPath string, billType string, err error) { + cleaner := adapter.GetCleaner() + return cleaner.Convert(inputPath) +} + // DetectBillTypeFromOutput 从脚本输出中检测账单类型 // 保留此函数以兼容其他调用 func DetectBillTypeFromOutput(output string) string { diff --git a/server/service/extractor.go b/server/service/extractor.go index a37ddeb..e7ae37e 100644 --- a/server/service/extractor.go +++ b/server/service/extractor.go @@ -27,6 +27,7 @@ func extractFromCSV(filePath string) []model.ReviewRecord { defer file.Close() reader := csv.NewReader(file) + reader.FieldsPerRecord = -1 // 允许变长记录 rows, err := reader.ReadAll() if err != nil || len(rows) < 2 { return records diff --git a/web/package.json b/web/package.json index 8238068..16379fb 100644 --- a/web/package.json +++ b/web/package.json @@ -1,7 +1,7 @@ { "name": "web", "private": true, - "version": "1.0.9", + "version": "1.1.0", "type": "module", "scripts": { "dev": "vite dev", diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts index dd7112a..aaad449 100644 --- a/web/src/lib/api.ts +++ b/web/src/lib/api.ts @@ -100,7 +100,7 @@ export interface MonthlyStatsResponse { export async function uploadBill( file: File, type: BillType, - options?: { year?: number; month?: number } + options?: { year?: number; month?: number; password?: string } ): Promise { const formData = new FormData(); formData.append('file', file); @@ -112,6 +112,9 @@ export async function uploadBill( if (options?.month) { formData.append('month', options.month.toString()); } + if (options?.password) { + formData.append('password', options.password); + } const response = await apiFetch(`${API_BASE}/api/upload`, { method: 'POST', diff --git a/web/src/routes/+page.svelte b/web/src/routes/+page.svelte index 4451522..f49c25c 100644 --- a/web/src/routes/+page.svelte +++ b/web/src/routes/+page.svelte @@ -23,6 +23,8 @@ let isUploading = $state(false); let uploadResult: UploadResponse | null = $state(null); let errorMessage = $state(''); + let zipPassword = $state(''); + let isZipFile = $state(false); type StatTrend = 'up' | 'down'; interface StatCard { @@ -186,16 +188,27 @@ } function selectFile(file: File) { - if (!file.name.endsWith('.csv')) { - errorMessage = '请选择 CSV 格式的账单文件'; + const fileName = file.name.toLowerCase(); + const isZip = fileName.endsWith('.zip'); + const isCsv = fileName.endsWith('.csv'); + const isXlsx = fileName.endsWith('.xlsx'); + + if (!isCsv && !isZip && !isXlsx) { + errorMessage = '请选择 CSV、XLSX 或 ZIP 格式的账单文件'; return; } + selectedFile = file; + isZipFile = isZip; errorMessage = ''; uploadResult = null; + // 如果不是 ZIP 文件,清空密码 + if (!isZip) { + zipPassword = ''; + } + // 根据文件名自动识别账单类型 - const fileName = file.name.toLowerCase(); if (fileName.includes('支付宝') || fileName.includes('alipay')) { selectedType = 'alipay'; } else if (fileName.includes('微信') || fileName.includes('wechat')) { @@ -207,6 +220,8 @@ selectedFile = null; uploadResult = null; errorMessage = ''; + zipPassword = ''; + isZipFile = false; } async function handleUpload() { @@ -216,7 +231,11 @@ errorMessage = ''; try { - const result = await uploadBill(selectedFile, selectedType); + const options: { year?: number; month?: number; password?: string } = {}; + if (isZipFile && zipPassword) { + options.password = zipPassword; + } + const result = await uploadBill(selectedFile, selectedType, options); if (result.result) { uploadResult = result; } else { @@ -278,7 +297,7 @@
上传账单 - 支持支付宝、微信账单 CSV 文件 + 支持支付宝、微信账单 CSV、XLSX 或 ZIP 文件