feat: 支持ZIP压缩包上传(含密码保护)
This commit is contained in:
@@ -17,6 +17,12 @@ type CleanResult struct {
|
||||
Output string // 脚本输出信息
|
||||
}
|
||||
|
||||
// ConvertResult 格式转换结果
|
||||
type ConvertResult struct {
|
||||
OutputPath string // 转换后的文件路径
|
||||
BillType string // 检测到的账单类型: alipay/wechat
|
||||
}
|
||||
|
||||
// Cleaner 账单清洗器接口
|
||||
// 负责将原始账单数据清洗为标准格式
|
||||
type Cleaner interface {
|
||||
@@ -25,4 +31,9 @@ type Cleaner interface {
|
||||
// outputPath: 输出文件路径
|
||||
// opts: 清洗选项
|
||||
Clean(inputPath, outputPath string, opts *CleanOptions) (*CleanResult, error)
|
||||
|
||||
// Convert 转换账单文件格式(xlsx -> csv,处理 GBK 编码等)
|
||||
// inputPath: 输入文件路径
|
||||
// 返回: 转换后的文件路径, 检测到的账单类型, 错误
|
||||
Convert(inputPath string) (outputPath string, billType string, err error)
|
||||
}
|
||||
|
||||
@@ -185,6 +185,88 @@ func (c *Cleaner) downloadFile(remotePath, localPath string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// ConvertResponse 转换响应
|
||||
type ConvertResponse struct {
|
||||
Success bool `json:"success"`
|
||||
BillType string `json:"bill_type"`
|
||||
Message string `json:"message"`
|
||||
OutputPath string `json:"output_path,omitempty"`
|
||||
}
|
||||
|
||||
// Convert 转换账单文件格式(xlsx -> csv,处理 GBK 编码等)
|
||||
func (c *Cleaner) Convert(inputPath string) (outputPath string, billType string, err error) {
|
||||
// 打开输入文件
|
||||
file, err := os.Open(inputPath)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("打开文件失败: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// 创建 multipart form
|
||||
var body bytes.Buffer
|
||||
writer := multipart.NewWriter(&body)
|
||||
|
||||
// 添加文件
|
||||
part, err := writer.CreateFormFile("file", filepath.Base(inputPath))
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("创建表单文件失败: %w", err)
|
||||
}
|
||||
if _, err := io.Copy(part, file); err != nil {
|
||||
return "", "", fmt.Errorf("复制文件内容失败: %w", err)
|
||||
}
|
||||
writer.Close()
|
||||
|
||||
// 发送转换请求
|
||||
fmt.Printf("🌐 调用转换服务: %s/convert\n", c.baseURL)
|
||||
req, err := http.NewRequest("POST", c.baseURL+"/convert", &body)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("创建请求失败: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", writer.FormDataContentType())
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("HTTP 请求失败: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// 读取响应
|
||||
respBody, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("读取响应失败: %w", err)
|
||||
}
|
||||
|
||||
// 处理错误响应
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
var errResp ErrorResponse
|
||||
if err := json.Unmarshal(respBody, &errResp); err == nil {
|
||||
return "", "", fmt.Errorf("转换失败: %s", errResp.Detail)
|
||||
}
|
||||
return "", "", fmt.Errorf("转换失败: HTTP %d - %s", resp.StatusCode, string(respBody))
|
||||
}
|
||||
|
||||
// 解析成功响应
|
||||
var convertResp ConvertResponse
|
||||
if err := json.Unmarshal(respBody, &convertResp); err != nil {
|
||||
return "", "", fmt.Errorf("解析响应失败: %w", err)
|
||||
}
|
||||
|
||||
// 下载转换后的文件到本地(与输入文件同目录,但扩展名改为 .csv)
|
||||
localOutputPath := inputPath[:len(inputPath)-len(filepath.Ext(inputPath))] + ".csv"
|
||||
fmt.Printf(" 下载转换后文件: %s -> %s\n", convertResp.OutputPath, localOutputPath)
|
||||
if err := c.downloadFile(convertResp.OutputPath, localOutputPath); err != nil {
|
||||
return "", "", fmt.Errorf("下载转换结果失败: %w", err)
|
||||
}
|
||||
|
||||
// 验证文件是否存在
|
||||
if _, err := os.Stat(localOutputPath); err != nil {
|
||||
return "", "", fmt.Errorf("下载后文件不存在: %s", localOutputPath)
|
||||
}
|
||||
fmt.Printf(" 文件下载成功,已保存到: %s\n", localOutputPath)
|
||||
|
||||
return localOutputPath, convertResp.BillType, nil
|
||||
}
|
||||
|
||||
// HealthCheck 检查 Python 服务健康状态
|
||||
func (c *Cleaner) HealthCheck() error {
|
||||
resp, err := c.httpClient.Get(c.baseURL + "/health")
|
||||
|
||||
@@ -90,5 +90,11 @@ func detectBillTypeFromOutput(output string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Convert 转换账单文件格式(xlsx -> csv,处理 GBK 编码等)
|
||||
// 子进程模式不支持此功能,请使用 HTTP 模式
|
||||
func (c *Cleaner) Convert(inputPath string) (outputPath string, billType string, err error) {
|
||||
return "", "", fmt.Errorf("子进程模式不支持文件格式转换,请使用 HTTP 模式 (analyzer_mode: http)")
|
||||
}
|
||||
|
||||
// 确保 Cleaner 实现了 adapter.Cleaner 接口
|
||||
var _ adapter.Cleaner = (*Cleaner)(nil)
|
||||
|
||||
@@ -4,7 +4,10 @@ go 1.21
|
||||
|
||||
require (
|
||||
github.com/gin-gonic/gin v1.9.1
|
||||
github.com/golang-jwt/jwt/v5 v5.3.0
|
||||
github.com/yeka/zip v0.0.0-20231116150916-03d6312748a9
|
||||
go.mongodb.org/mongo-driver v1.13.1
|
||||
golang.org/x/text v0.9.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
|
||||
@@ -17,7 +20,6 @@ require (
|
||||
github.com/go-playground/universal-translator v0.18.1 // indirect
|
||||
github.com/go-playground/validator/v10 v10.14.0 // indirect
|
||||
github.com/goccy/go-json v0.10.2 // indirect
|
||||
github.com/golang-jwt/jwt/v5 v5.3.0 // indirect
|
||||
github.com/golang/snappy v0.0.1 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/klauspost/compress v1.13.6 // indirect
|
||||
@@ -39,6 +41,5 @@ require (
|
||||
golang.org/x/net v0.10.0 // indirect
|
||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 // indirect
|
||||
golang.org/x/sys v0.8.0 // indirect
|
||||
golang.org/x/text v0.9.0 // indirect
|
||||
google.golang.org/protobuf v1.30.0 // indirect
|
||||
)
|
||||
|
||||
@@ -75,6 +75,8 @@ github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY=
|
||||
github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4=
|
||||
github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8=
|
||||
github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM=
|
||||
github.com/yeka/zip v0.0.0-20231116150916-03d6312748a9 h1:K8gF0eekWPEX+57l30ixxzGhHH/qscI3JCnuhbN6V4M=
|
||||
github.com/yeka/zip v0.0.0-20231116150916-03d6312748a9/go.mod h1:9BnoKCcgJ/+SLhfAXj15352hTOuVmG5Gzo8xNRINfqI=
|
||||
github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d h1:splanxYIlg+5LfHAM6xpdFEAYOk8iySO56hMFq6uLyA=
|
||||
github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA=
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
@@ -18,6 +19,8 @@ import (
|
||||
)
|
||||
|
||||
// Upload 处理账单上传和清理请求
|
||||
// 支持直接上传 CSV 文件,或上传 ZIP 压缩包(支持密码保护)
|
||||
// ZIP 包内可以是 CSV 或 XLSX 格式的账单文件
|
||||
func Upload(c *gin.Context) {
|
||||
// 1. 获取上传的文件
|
||||
file, header, err := c.Request.FormFile("file")
|
||||
@@ -37,32 +40,12 @@ func Upload(c *gin.Context) {
|
||||
req.Format = "csv"
|
||||
}
|
||||
|
||||
// 验证 type 参数
|
||||
if req.Type == "" {
|
||||
c.JSON(http.StatusBadRequest, model.UploadResponse{
|
||||
Result: false,
|
||||
Message: "请指定账单类型 (type: alipay 或 wechat)",
|
||||
})
|
||||
return
|
||||
}
|
||||
if req.Type != "alipay" && req.Type != "wechat" {
|
||||
c.JSON(http.StatusBadRequest, model.UploadResponse{
|
||||
Result: false,
|
||||
Message: "账单类型无效,仅支持 alipay 或 wechat",
|
||||
})
|
||||
return
|
||||
}
|
||||
billType := req.Type
|
||||
|
||||
// 3. 保存上传的文件(添加唯一ID避免覆盖)
|
||||
// 3. 保存上传的文件
|
||||
timestamp := time.Now().Format("20060102_150405")
|
||||
uniqueID := generateShortID()
|
||||
|
||||
// 获取文件扩展名和基础名
|
||||
ext := filepath.Ext(header.Filename)
|
||||
baseName := header.Filename[:len(header.Filename)-len(ext)]
|
||||
|
||||
// 文件名格式: 时间戳_唯一ID_原始文件名
|
||||
inputFileName := fmt.Sprintf("%s_%s_%s%s", timestamp, uniqueID, baseName, ext)
|
||||
uploadDirAbs := config.ResolvePath(config.Global.UploadDir)
|
||||
inputPath := filepath.Join(uploadDirAbs, inputFileName)
|
||||
@@ -76,12 +59,117 @@ func Upload(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
defer dst.Close()
|
||||
io.Copy(dst, file)
|
||||
if _, err := io.Copy(dst, file); err != nil {
|
||||
c.JSON(http.StatusInternalServerError, model.UploadResponse{
|
||||
Result: false,
|
||||
Message: "保存文件失败: " + err.Error(),
|
||||
})
|
||||
return
|
||||
}
|
||||
dst.Close() // 关闭文件以便后续处理
|
||||
|
||||
// 4. 对原始数据进行去重检查
|
||||
// 4. 处理文件:如果是 ZIP 则解压,否则直接处理
|
||||
var billFilePath string
|
||||
var billType string
|
||||
var extractedFiles []string
|
||||
var needConvert bool // 是否需要格式转换(xlsx -> csv)
|
||||
|
||||
if service.IsSupportedArchive(header.Filename) {
|
||||
// 解压 ZIP 文件
|
||||
fmt.Printf("📦 检测到 ZIP 文件,开始解压...\n")
|
||||
extractResult, err := service.ExtractZip(inputPath, uploadDirAbs, req.Password)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, model.UploadResponse{
|
||||
Result: false,
|
||||
Message: "解压失败: " + err.Error(),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
billFilePath = extractResult.BillFile
|
||||
extractedFiles = extractResult.ExtractedFiles
|
||||
|
||||
// 使用从文件名检测到的账单类型(如果用户未指定)
|
||||
if req.Type == "" && extractResult.BillType != "" {
|
||||
billType = extractResult.BillType
|
||||
}
|
||||
|
||||
fmt.Printf(" 解压完成,账单文件: %s\n", filepath.Base(billFilePath))
|
||||
|
||||
// ZIP 中提取的文件需要格式转换(xlsx 需要转 csv,csv 可能需要编码转换)
|
||||
needConvert = true
|
||||
} else {
|
||||
// 直接使用上传的文件
|
||||
billFilePath = inputPath
|
||||
|
||||
// 检查是否为 xlsx 格式
|
||||
if strings.HasSuffix(strings.ToLower(header.Filename), ".xlsx") {
|
||||
needConvert = true
|
||||
}
|
||||
}
|
||||
|
||||
// 5. 如果需要格式/编码转换,调用 analyzer 服务
|
||||
if needConvert {
|
||||
fmt.Printf("📊 调用分析服务进行格式/编码转换...\n")
|
||||
convertedPath, detectedType, err := service.ConvertBillFile(billFilePath)
|
||||
if err != nil {
|
||||
// 清理临时文件
|
||||
service.CleanupExtractedFiles(extractedFiles)
|
||||
c.JSON(http.StatusBadRequest, model.UploadResponse{
|
||||
Result: false,
|
||||
Message: "文件转换失败: " + err.Error(),
|
||||
})
|
||||
return
|
||||
}
|
||||
// 如果转换后的路径与原路径不同,删除原始文件
|
||||
if convertedPath != billFilePath {
|
||||
os.Remove(billFilePath)
|
||||
}
|
||||
billFilePath = convertedPath
|
||||
|
||||
// 使用检测到的账单类型
|
||||
if req.Type == "" && detectedType != "" {
|
||||
billType = detectedType
|
||||
}
|
||||
fmt.Printf(" 转换完成: %s\n", filepath.Base(convertedPath))
|
||||
}
|
||||
|
||||
// 6. 确定账单类型
|
||||
if req.Type != "" {
|
||||
billType = req.Type
|
||||
}
|
||||
if billType == "" {
|
||||
// 尝试从文件名检测
|
||||
fileName := strings.ToLower(filepath.Base(billFilePath))
|
||||
if strings.Contains(fileName, "支付宝") || strings.Contains(fileName, "alipay") {
|
||||
billType = "alipay"
|
||||
} else if strings.Contains(fileName, "微信") || strings.Contains(fileName, "wechat") {
|
||||
billType = "wechat"
|
||||
}
|
||||
}
|
||||
if billType == "" {
|
||||
// 清理临时文件
|
||||
service.CleanupExtractedFiles(extractedFiles)
|
||||
c.JSON(http.StatusBadRequest, model.UploadResponse{
|
||||
Result: false,
|
||||
Message: "无法识别账单类型,请指定 type 参数 (alipay 或 wechat)",
|
||||
})
|
||||
return
|
||||
}
|
||||
if billType != "alipay" && billType != "wechat" {
|
||||
service.CleanupExtractedFiles(extractedFiles)
|
||||
c.JSON(http.StatusBadRequest, model.UploadResponse{
|
||||
Result: false,
|
||||
Message: "账单类型无效,仅支持 alipay 或 wechat",
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// 7. 对原始数据进行去重检查
|
||||
fmt.Printf("📋 开始去重检查...\n")
|
||||
dedupResult, dedupErr := service.DeduplicateRawFile(inputPath, timestamp)
|
||||
dedupResult, dedupErr := service.DeduplicateRawFile(billFilePath, timestamp)
|
||||
if dedupErr != nil {
|
||||
service.CleanupExtractedFiles(extractedFiles)
|
||||
c.JSON(http.StatusInternalServerError, model.UploadResponse{
|
||||
Result: false,
|
||||
Message: "去重检查失败: " + dedupErr.Error(),
|
||||
@@ -97,6 +185,7 @@ func Upload(c *gin.Context) {
|
||||
|
||||
// 如果全部重复,返回提示
|
||||
if dedupResult.NewCount == 0 {
|
||||
service.CleanupExtractedFiles(extractedFiles)
|
||||
c.JSON(http.StatusOK, model.UploadResponse{
|
||||
Result: true,
|
||||
Message: fmt.Sprintf("文件中的 %d 条记录全部已存在,无需重复导入", dedupResult.OriginalCount),
|
||||
@@ -113,7 +202,7 @@ func Upload(c *gin.Context) {
|
||||
// 使用去重后的文件路径进行后续处理
|
||||
processFilePath := dedupResult.DedupFilePath
|
||||
|
||||
// 5. 构建输出文件路径:时间_type_编号
|
||||
// 8. 构建输出文件路径
|
||||
outputExt := ".csv"
|
||||
if req.Format == "json" {
|
||||
outputExt = ".json"
|
||||
@@ -123,7 +212,7 @@ func Upload(c *gin.Context) {
|
||||
outputFileName := fmt.Sprintf("%s_%s_%s%s", timestamp, billType, fileSeq, outputExt)
|
||||
outputPath := filepath.Join(outputDirAbs, outputFileName)
|
||||
|
||||
// 6. 执行 Python 清洗脚本
|
||||
// 9. 执行 Python 清洗脚本
|
||||
cleanOpts := &service.CleanOptions{
|
||||
Year: req.Year,
|
||||
Month: req.Month,
|
||||
@@ -133,6 +222,7 @@ func Upload(c *gin.Context) {
|
||||
}
|
||||
_, cleanErr := service.RunCleanScript(processFilePath, outputPath, cleanOpts)
|
||||
if cleanErr != nil {
|
||||
service.CleanupExtractedFiles(extractedFiles)
|
||||
c.JSON(http.StatusInternalServerError, model.UploadResponse{
|
||||
Result: false,
|
||||
Message: cleanErr.Error(),
|
||||
@@ -140,7 +230,7 @@ func Upload(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// 7. 将去重后的原始数据存入 MongoDB(原始数据集合)
|
||||
// 10. 将去重后的原始数据存入 MongoDB
|
||||
rawCount, rawErr := service.SaveRawBillsFromFile(processFilePath, billType, header.Filename, timestamp)
|
||||
if rawErr != nil {
|
||||
fmt.Printf("⚠️ 存储原始数据到 MongoDB 失败: %v\n", rawErr)
|
||||
@@ -148,7 +238,7 @@ func Upload(c *gin.Context) {
|
||||
fmt.Printf("✅ 已存储 %d 条原始账单记录到 MongoDB\n", rawCount)
|
||||
}
|
||||
|
||||
// 9. 将清洗后的数据存入 MongoDB(清洗后数据集合)
|
||||
// 11. 将清洗后的数据存入 MongoDB
|
||||
cleanedCount, _, cleanedErr := service.SaveCleanedBillsFromFile(outputPath, req.Format, billType, header.Filename, timestamp)
|
||||
if cleanedErr != nil {
|
||||
fmt.Printf("⚠️ 存储清洗后数据到 MongoDB 失败: %v\n", cleanedErr)
|
||||
@@ -156,12 +246,13 @@ func Upload(c *gin.Context) {
|
||||
fmt.Printf("✅ 已存储 %d 条清洗后账单记录到 MongoDB\n", cleanedCount)
|
||||
}
|
||||
|
||||
// 10. 清理临时的去重文件(如果生成了的话)
|
||||
// 12. 清理临时文件
|
||||
if dedupResult.DedupFilePath != inputPath && dedupResult.DedupFilePath != "" {
|
||||
os.Remove(dedupResult.DedupFilePath)
|
||||
}
|
||||
service.CleanupExtractedFiles(extractedFiles)
|
||||
|
||||
// 11. 返回成功响应
|
||||
// 13. 返回成功响应
|
||||
message := fmt.Sprintf("处理成功,新增 %d 条记录", cleanedCount)
|
||||
if dedupResult.DuplicateCount > 0 {
|
||||
message = fmt.Sprintf("处理成功,新增 %d 条,跳过 %d 条重复记录", cleanedCount, dedupResult.DuplicateCount)
|
||||
@@ -182,7 +273,6 @@ func Upload(c *gin.Context) {
|
||||
}
|
||||
|
||||
// generateFileSequence 生成文件序号
|
||||
// 根据当前目录下同一时间戳和类型的文件数量生成序号
|
||||
func generateFileSequence(dir, timestamp, billType, ext string) string {
|
||||
pattern := fmt.Sprintf("%s_%s_*%s", timestamp, billType, ext)
|
||||
matches, err := filepath.Glob(filepath.Join(dir, pattern))
|
||||
@@ -194,9 +284,8 @@ func generateFileSequence(dir, timestamp, billType, ext string) string {
|
||||
|
||||
// generateShortID 生成 6 位随机唯一标识符
|
||||
func generateShortID() string {
|
||||
bytes := make([]byte, 3) // 3 字节 = 6 个十六进制字符
|
||||
bytes := make([]byte, 3)
|
||||
if _, err := rand.Read(bytes); err != nil {
|
||||
// 如果随机数生成失败,使用时间纳秒作为备选
|
||||
return fmt.Sprintf("%06x", time.Now().UnixNano()%0xFFFFFF)
|
||||
}
|
||||
return hex.EncodeToString(bytes)
|
||||
|
||||
@@ -2,10 +2,11 @@ package model
|
||||
|
||||
// UploadRequest 上传请求参数
|
||||
type UploadRequest struct {
|
||||
Type string `form:"type"` // 账单类型: alipay/wechat(必填)
|
||||
Year string `form:"year"` // 年份筛选
|
||||
Month string `form:"month"` // 月份筛选
|
||||
Start string `form:"start"` // 起始日期
|
||||
End string `form:"end"` // 结束日期
|
||||
Format string `form:"format"` // 输出格式: csv/json
|
||||
Type string `form:"type"` // 账单类型: alipay/wechat(可选,会自动检测)
|
||||
Password string `form:"password"` // ZIP 文件密码(可选)
|
||||
Year string `form:"year"` // 年份筛选
|
||||
Month string `form:"month"` // 月份筛选
|
||||
Start string `form:"start"` // 起始日期
|
||||
End string `form:"end"` // 结束日期
|
||||
Format string `form:"format"` // 输出格式: csv/json
|
||||
}
|
||||
|
||||
159
server/service/archive.go
Normal file
159
server/service/archive.go
Normal file
@@ -0,0 +1,159 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/yeka/zip"
|
||||
"golang.org/x/text/encoding/simplifiedchinese"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// ExtractResult 解压结果
|
||||
type ExtractResult struct {
|
||||
ExtractedFiles []string // 解压出的文件路径
|
||||
BillFile string // 账单文件路径(csv 或 xlsx)
|
||||
BillType string // 检测到的账单类型
|
||||
}
|
||||
|
||||
// ExtractZip 解压 ZIP 文件,支持密码
|
||||
// 返回解压后的账单文件路径
|
||||
func ExtractZip(zipPath, destDir, password string) (*ExtractResult, error) {
|
||||
reader, err := zip.OpenReader(zipPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("无法打开 ZIP 文件: %w", err)
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
result := &ExtractResult{
|
||||
ExtractedFiles: make([]string, 0),
|
||||
}
|
||||
|
||||
timestamp := time.Now().Format("20060102_150405")
|
||||
|
||||
for _, file := range reader.File {
|
||||
// 处理文件名编码(可能是 GBK)
|
||||
fileName := decodeFileName(file.Name)
|
||||
|
||||
// 安全检查:防止路径遍历
|
||||
if strings.Contains(fileName, "..") {
|
||||
continue
|
||||
}
|
||||
|
||||
// 获取文件扩展名
|
||||
ext := strings.ToLower(filepath.Ext(fileName))
|
||||
|
||||
// 生成安全的目标文件名(避免编码问题)
|
||||
// 使用时间戳+序号+扩展名的格式
|
||||
safeFileName := fmt.Sprintf("extracted_%s_%d%s", timestamp, len(result.ExtractedFiles), ext)
|
||||
destPath := filepath.Join(destDir, safeFileName)
|
||||
|
||||
if file.FileInfo().IsDir() {
|
||||
os.MkdirAll(destPath, 0755)
|
||||
continue
|
||||
}
|
||||
|
||||
// 确保目录存在
|
||||
if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil {
|
||||
return nil, fmt.Errorf("创建目录失败: %w", err)
|
||||
}
|
||||
|
||||
// 设置密码(如果有)
|
||||
if file.IsEncrypted() {
|
||||
if password == "" {
|
||||
return nil, fmt.Errorf("ZIP 文件已加密,请提供密码")
|
||||
}
|
||||
file.SetPassword(password)
|
||||
}
|
||||
|
||||
// 打开文件
|
||||
rc, err := file.Open()
|
||||
if err != nil {
|
||||
if file.IsEncrypted() {
|
||||
return nil, fmt.Errorf("密码错误或无法解密文件")
|
||||
}
|
||||
return nil, fmt.Errorf("无法读取文件 %s: %w", fileName, err)
|
||||
}
|
||||
|
||||
// 写入文件
|
||||
destFile, err := os.Create(destPath)
|
||||
if err != nil {
|
||||
rc.Close()
|
||||
return nil, fmt.Errorf("创建文件失败: %w", err)
|
||||
}
|
||||
|
||||
_, err = io.Copy(destFile, rc)
|
||||
rc.Close()
|
||||
destFile.Close()
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("写入文件失败: %w", err)
|
||||
}
|
||||
|
||||
result.ExtractedFiles = append(result.ExtractedFiles, destPath)
|
||||
|
||||
// 检测账单文件
|
||||
if ext == ".csv" || ext == ".xlsx" {
|
||||
result.BillFile = destPath
|
||||
|
||||
// 检测账单类型(从原始文件名检测)
|
||||
if strings.Contains(fileName, "支付宝") || strings.Contains(strings.ToLower(fileName), "alipay") {
|
||||
result.BillType = "alipay"
|
||||
} else if strings.Contains(fileName, "微信") || strings.Contains(strings.ToLower(fileName), "wechat") {
|
||||
result.BillType = "wechat"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if result.BillFile == "" {
|
||||
return nil, fmt.Errorf("ZIP 文件中未找到账单文件(.csv 或 .xlsx)")
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// decodeFileName 尝试将 GBK 编码的文件名转换为 UTF-8
|
||||
func decodeFileName(name string) string {
|
||||
// 如果文件名只包含 ASCII 字符,直接返回
|
||||
isAscii := true
|
||||
for i := 0; i < len(name); i++ {
|
||||
if name[i] > 127 {
|
||||
isAscii = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if isAscii {
|
||||
return name
|
||||
}
|
||||
|
||||
// 尝试 GBK 解码
|
||||
// Windows 上创建的 ZIP 文件通常使用 GBK 编码中文文件名
|
||||
decoded, _, err := transform.String(simplifiedchinese.GBK.NewDecoder(), name)
|
||||
if err == nil && len(decoded) > 0 {
|
||||
return decoded
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
// IsSupportedArchive 检查文件是否为支持的压缩格式
|
||||
func IsSupportedArchive(filename string) bool {
|
||||
lower := strings.ToLower(filename)
|
||||
return strings.HasSuffix(lower, ".zip")
|
||||
}
|
||||
|
||||
// IsBillFile 检查文件是否为账单文件
|
||||
func IsBillFile(filename string) bool {
|
||||
lower := strings.ToLower(filename)
|
||||
return strings.HasSuffix(lower, ".csv") || strings.HasSuffix(lower, ".xlsx")
|
||||
}
|
||||
|
||||
// CleanupExtractedFiles 清理解压的临时文件
|
||||
func CleanupExtractedFiles(files []string) {
|
||||
for _, f := range files {
|
||||
os.Remove(f)
|
||||
}
|
||||
}
|
||||
@@ -47,6 +47,7 @@ func DeduplicateRawFile(filePath, uploadBatch string) (*DeduplicateResult, error
|
||||
defer file.Close()
|
||||
|
||||
reader := csv.NewReader(file)
|
||||
reader.FieldsPerRecord = -1 // 允许变长记录
|
||||
rows, err := reader.ReadAll()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("读取 CSV 失败: %w", err)
|
||||
@@ -183,6 +184,7 @@ func SaveRawBillsFromFile(filePath, billType, sourceFile, uploadBatch string) (i
|
||||
defer file.Close()
|
||||
|
||||
reader := csv.NewReader(file)
|
||||
reader.FieldsPerRecord = -1 // 允许变长记录
|
||||
rows, err := reader.ReadAll()
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("读取 CSV 失败: %w", err)
|
||||
@@ -249,6 +251,7 @@ func saveCleanedBillsFromCSV(filePath, billType, sourceFile, uploadBatch string)
|
||||
defer file.Close()
|
||||
|
||||
reader := csv.NewReader(file)
|
||||
reader.FieldsPerRecord = -1 // 允许变长记录
|
||||
rows, err := reader.ReadAll()
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("读取 CSV 失败: %w", err)
|
||||
|
||||
@@ -20,6 +20,13 @@ func RunCleanScript(inputPath, outputPath string, opts *CleanOptions) (*CleanRes
|
||||
return cleaner.Clean(inputPath, outputPath, opts)
|
||||
}
|
||||
|
||||
// ConvertBillFile 转换账单文件格式(xlsx -> csv,处理编码)
|
||||
// 返回转换后的文件路径和检测到的账单类型
|
||||
func ConvertBillFile(inputPath string) (outputPath string, billType string, err error) {
|
||||
cleaner := adapter.GetCleaner()
|
||||
return cleaner.Convert(inputPath)
|
||||
}
|
||||
|
||||
// DetectBillTypeFromOutput 从脚本输出中检测账单类型
|
||||
// 保留此函数以兼容其他调用
|
||||
func DetectBillTypeFromOutput(output string) string {
|
||||
|
||||
@@ -27,6 +27,7 @@ func extractFromCSV(filePath string) []model.ReviewRecord {
|
||||
defer file.Close()
|
||||
|
||||
reader := csv.NewReader(file)
|
||||
reader.FieldsPerRecord = -1 // 允许变长记录
|
||||
rows, err := reader.ReadAll()
|
||||
if err != nil || len(rows) < 2 {
|
||||
return records
|
||||
|
||||
Reference in New Issue
Block a user