Files
billai/server/handler/upload.go

204 lines
6.0 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package handler
import (
"crypto/rand"
"encoding/hex"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"time"
"github.com/gin-gonic/gin"
"billai-server/config"
"billai-server/model"
"billai-server/service"
)
// Upload 处理账单上传和清理请求
func Upload(c *gin.Context) {
// 1. 获取上传的文件
file, header, err := c.Request.FormFile("file")
if err != nil {
c.JSON(http.StatusBadRequest, model.UploadResponse{
Result: false,
Message: "请上传账单文件 (参数名: file)",
})
return
}
defer file.Close()
// 2. 解析请求参数
var req model.UploadRequest
c.ShouldBind(&req)
if req.Format == "" {
req.Format = "csv"
}
// 验证 type 参数
if req.Type == "" {
c.JSON(http.StatusBadRequest, model.UploadResponse{
Result: false,
Message: "请指定账单类型 (type: alipay 或 wechat)",
})
return
}
if req.Type != "alipay" && req.Type != "wechat" {
c.JSON(http.StatusBadRequest, model.UploadResponse{
Result: false,
Message: "账单类型无效,仅支持 alipay 或 wechat",
})
return
}
billType := req.Type
// 3. 保存上传的文件添加唯一ID避免覆盖
timestamp := time.Now().Format("20060102_150405")
uniqueID := generateShortID()
// 获取文件扩展名和基础名
ext := filepath.Ext(header.Filename)
baseName := header.Filename[:len(header.Filename)-len(ext)]
// 文件名格式: 时间戳_唯一ID_原始文件名
inputFileName := fmt.Sprintf("%s_%s_%s%s", timestamp, uniqueID, baseName, ext)
uploadDirAbs := config.ResolvePath(config.Global.UploadDir)
inputPath := filepath.Join(uploadDirAbs, inputFileName)
dst, err := os.Create(inputPath)
if err != nil {
c.JSON(http.StatusInternalServerError, model.UploadResponse{
Result: false,
Message: "保存文件失败: " + err.Error(),
})
return
}
defer dst.Close()
io.Copy(dst, file)
// 4. 对原始数据进行去重检查
fmt.Printf("📋 开始去重检查...\n")
dedupResult, dedupErr := service.DeduplicateRawFile(inputPath, timestamp)
if dedupErr != nil {
c.JSON(http.StatusInternalServerError, model.UploadResponse{
Result: false,
Message: "去重检查失败: " + dedupErr.Error(),
})
return
}
fmt.Printf(" 原始记录: %d 条\n", dedupResult.OriginalCount)
if dedupResult.DuplicateCount > 0 {
fmt.Printf(" 重复记录: %d 条(已跳过)\n", dedupResult.DuplicateCount)
}
fmt.Printf(" 新增记录: %d 条\n", dedupResult.NewCount)
// 如果全部重复,返回提示
if dedupResult.NewCount == 0 {
c.JSON(http.StatusOK, model.UploadResponse{
Result: true,
Message: fmt.Sprintf("文件中的 %d 条记录全部已存在,无需重复导入", dedupResult.OriginalCount),
Data: &model.UploadData{
BillType: billType,
RawCount: 0,
CleanedCount: 0,
DuplicateCount: dedupResult.DuplicateCount,
},
})
return
}
// 使用去重后的文件路径进行后续处理
processFilePath := dedupResult.DedupFilePath
// 5. 构建输出文件路径时间_type_编号
outputExt := ".csv"
if req.Format == "json" {
outputExt = ".json"
}
outputDirAbs := config.ResolvePath(config.Global.OutputDir)
fileSeq := generateFileSequence(outputDirAbs, timestamp, billType, outputExt)
outputFileName := fmt.Sprintf("%s_%s_%s%s", timestamp, billType, fileSeq, outputExt)
outputPath := filepath.Join(outputDirAbs, outputFileName)
// 6. 执行 Python 清洗脚本
cleanOpts := &service.CleanOptions{
Year: req.Year,
Month: req.Month,
Start: req.Start,
End: req.End,
Format: req.Format,
}
_, cleanErr := service.RunCleanScript(processFilePath, outputPath, cleanOpts)
if cleanErr != nil {
c.JSON(http.StatusInternalServerError, model.UploadResponse{
Result: false,
Message: cleanErr.Error(),
})
return
}
// 7. 将去重后的原始数据存入 MongoDB原始数据集合
rawCount, rawErr := service.SaveRawBillsFromFile(processFilePath, billType, header.Filename, timestamp)
if rawErr != nil {
fmt.Printf("⚠️ 存储原始数据到 MongoDB 失败: %v\n", rawErr)
} else {
fmt.Printf("✅ 已存储 %d 条原始账单记录到 MongoDB\n", rawCount)
}
// 9. 将清洗后的数据存入 MongoDB清洗后数据集合
cleanedCount, _, cleanedErr := service.SaveCleanedBillsFromFile(outputPath, req.Format, billType, header.Filename, timestamp)
if cleanedErr != nil {
fmt.Printf("⚠️ 存储清洗后数据到 MongoDB 失败: %v\n", cleanedErr)
} else {
fmt.Printf("✅ 已存储 %d 条清洗后账单记录到 MongoDB\n", cleanedCount)
}
// 10. 清理临时的去重文件(如果生成了的话)
if dedupResult.DedupFilePath != inputPath && dedupResult.DedupFilePath != "" {
os.Remove(dedupResult.DedupFilePath)
}
// 11. 返回成功响应
message := fmt.Sprintf("处理成功,新增 %d 条记录", cleanedCount)
if dedupResult.DuplicateCount > 0 {
message = fmt.Sprintf("处理成功,新增 %d 条,跳过 %d 条重复记录", cleanedCount, dedupResult.DuplicateCount)
}
c.JSON(http.StatusOK, model.UploadResponse{
Result: true,
Message: message,
Data: &model.UploadData{
BillType: billType,
FileURL: fmt.Sprintf("/download/%s", outputFileName),
FileName: outputFileName,
RawCount: rawCount,
CleanedCount: cleanedCount,
DuplicateCount: dedupResult.DuplicateCount,
},
})
}
// generateFileSequence 生成文件序号
// 根据当前目录下同一时间戳和类型的文件数量生成序号
func generateFileSequence(dir, timestamp, billType, ext string) string {
pattern := fmt.Sprintf("%s_%s_*%s", timestamp, billType, ext)
matches, err := filepath.Glob(filepath.Join(dir, pattern))
if err != nil || len(matches) == 0 {
return "001"
}
return fmt.Sprintf("%03d", len(matches)+1)
}
// generateShortID 生成 6 位随机唯一标识符
func generateShortID() string {
bytes := make([]byte, 3) // 3 字节 = 6 个十六进制字符
if _, err := rand.Read(bytes); err != nil {
// 如果随机数生成失败,使用时间纳秒作为备选
return fmt.Sprintf("%06x", time.Now().UnixNano()%0xFFFFFF)
}
return hex.EncodeToString(bytes)
}