15分钟学 Go 实战项目六 :统计分析工具项目(30000字完整例子)

统计分析工具项目

1. 项目概述

功能模块 说明 难度
数据收集 CSV文件读取和解析 ★★☆☆☆
数据分析 基本统计和高级分析 ★★★☆☆
可视化 生成图表和报告 ★★★★☆
导出功能 支持多种格式导出 ★★☆☆☆
Web界面 交互式数据分析 ★★★★☆

2. 项目架构

3. 核心代码实现

3.1 数据模型定义

go 复制代码
// models/dataset.go
package models

import (
    "time"
)

// DataSet 数据集结构
type DataSet struct {
    Name        string
    Description string
    Columns     []Column
    Rows        []Row
    CreatedAt   time.Time
    UpdatedAt   time.Time
}

// Column 列定义
type Column struct {
    Name     string
    Type     string      // string, int, float, date等
    Stats    Statistics
}

// Row 数据行
type Row struct {
    Values []interface{}
}

// Statistics 统计信息
type Statistics struct {
    Count       int
    Mean        float64
    Median      float64
    Mode        float64
    StdDev      float64
    Min         float64
    Max         float64
    Percentiles map[int]float64
}

// AnalysisResult 分析结果
type AnalysisResult struct {
    DataSetName    string
    ColumnStats    map[string]Statistics
    Correlations   map[string]map[string]float64
    Trends         map[string][]float64
    GeneratedAt    time.Time
}

// ChartConfig 图表配置
type ChartConfig struct {
    Type      string // line, bar, scatter等
    Title     string
    XAxis     string
    YAxis     string
    Data      map[string]interface{}
    Options   map[string]interface{}
}

// ReportTemplate 报告模板
type ReportTemplate struct {
    Name        string
    Content     string
    Charts      []ChartConfig
    CreatedAt   time.Time
}

3.2 数据处理核心功能

go 复制代码
// services/processor.go
package services

import (
    "encoding/csv"
    "math"
    "os"
    "sort"
    "strconv"

    "github.com/your/stats/models"
)

// DataProcessor 数据处理器
type DataProcessor struct {
    dataset *models.DataSet
}

// NewDataProcessor 创建数据处理器
func NewDataProcessor() *DataProcessor {
    return &DataProcessor{}
}

// LoadCSV 加载CSV文件
func (p *DataProcessor) LoadCSV(filename string) error {
    file, err := os.Open(filename)
    if err != nil {
        return err
    }
    defer file.Close()

    reader := csv.NewReader(file)
    
    // 读取表头
    headers, err := reader.Read()
    if err != nil {
        return err
    }

    // 初始化列
    columns := make([]models.Column, len(headers))
    for i, header := range headers {
        columns[i] = models.Column{
            Name: header,
            Type: "string", // 初始类型设为string,后续推断实际类型
        }
    }

    // 读取数据行
    var rows []models.Row
    for {
        record, err := reader.Read()
        if err != nil {
            break
        }

        values := make([]interface{}, len(record))
        for i, v := range record {
            values[i] = v
        }
        rows = append(rows, models.Row{Values: values})
    }

    p.dataset = &models.DataSet{
        Name:    filename,
        Columns: columns,
        Rows:    rows,
    }

    return p.inferDataTypes()
}

// 推断数据类型
func (p *DataProcessor) inferDataTypes() error {
    for colIndex := range p.dataset.Columns {
        isNumeric := true
        isDate := true

        for _, row := range p.dataset.Rows {
            value := row.Values[colIndex].(string)
            
            // 尝试解析为数字
            _, err := strconv.ParseFloat(value, 64)
            if err != nil {
                isNumeric = false
            }

            // 尝试解析为日期
            _, err = time.Parse("2006-01-02", value)
            if err != nil {
                isDate = false
            }
        }

        if isNumeric {
            p.dataset.Columns[colIndex].Type = "float"
        } else if isDate {
            p.dataset.Columns[colIndex].Type = "date"
        } else {
            p.dataset.Columns[colIndex].Type = "string"
        }
    }

    return nil
}

// 计算基本统计信息
func (p *DataProcessor) CalculateStatistics() error {
    for colIndex, col := range p.dataset.Columns {
        if col.Type != "float" {
            continue
        }

        values := make([]float64, 0, len(p.dataset.Rows))
        for _, row := range p.dataset.Rows {
            if v, err := strconv.ParseFloat(row.Values[colIndex].(string), 64); err == nil {
                values = append(values, v)
            }
        }

        stats := models.Statistics{
            Count:       len(values),
            Mean:       p.calculateMean(values),
            Median:     p.calculateMedian(values),
            Mode:       p.calculateMode(values),
            StdDev:     p.calculateStdDev(values),
            Min:        p.calculateMin(values),
            Max:        p.calculateMax(values),
            Percentiles: p.calculatePercentiles(values),
        }

        p.dataset.Columns[colIndex].Stats = stats
    }

    return nil
}

// 计算均值
func (p *DataProcessor) calculateMean(values []float64) float64 {
    if len(values) == 0 {
        return 0
    }

    sum := 0.0
    for _, v := range values {
        sum += v
    }
    return sum / float64(len(values))
}

// 计算中位数
func (p *DataProcessor) calculateMedian(values []float64) float64 {
    if len(values) == 0 {
        return 0
    }

    sorted := make([]float64, len(values))
    copy(sorted, values)
    sort.Float64s(sorted)

    if len(sorted)%2 == 0 {
        return (sorted[len(sorted)/2-1] + sorted[len(sorted)/2]) / 2
    }
    return sorted[len(sorted)/2]
}

// 计算标准差
func (p *DataProcessor) calculateStdDev(values []float64) float64 {
    if len(values) == 0 {
        return 0
    }

    mean := p.calculateMean(values)
    sum := 0.0
    for _, v := range values {
        sum += math.Pow(v-mean, 2)
    }
    return math.Sqrt(sum / float64(len(values)))
}

// 计算百分位数
func (p *DataProcessor) calculatePercentiles(values []float64) map[int]float64 {
    percentiles := make(map[int]float64)
    sorted := make([]float64, len(values))
    copy(sorted, values)
    sort.Float64s(sorted)

    for _, p := range []int{25, 50, 75, 90, 95, 99} {
        index := int(float64(p)/100 * float64(len(sorted)-1))
        percentiles[p] = sorted[index]
    }

    return percentiles
}

3.3 分析功能实现

go 复制代码
// services/analyzer.go
package services

import (
    "math"
    "sort"
    "time"
    
    "github.com/your/stats/models"
)

// Analyzer 数据分析器
type Analyzer struct {
    dataset *models.DataSet
}

// NewAnalyzer 创建分析器实例
func NewAnalyzer(dataset *models.DataSet) *Analyzer {
    return &Analyzer{dataset: dataset}
}

// CalculateCorrelation 计算相关性
func (a *Analyzer) CalculateCorrelation(col1, col2 string) (float64, error) {
    values1, values2, err := a.getNumericColumns(col1, col2)
    if err != nil {
        return 0, err
    }

    // 计算Pearson相关系数
    mean1 := mean(values1)
    mean2 := mean(values2)

    var sum, sum1, sum2 float64
    for i := range values1 {
        diff1 := values1[i] - mean1
        diff2 := values2[i] - mean2
        sum += diff1 * diff2
        sum1 += diff1 * diff1
        sum2 += diff2 * diff2
    }

    return sum / math.Sqrt(sum1*sum2), nil
}

// AnalyzeTrends 分析趋势
func (a *Analyzer) AnalyzeTrends(timeCol, valueCol string) ([]models.TrendPoint, error) {
    dates, values, err := a.getTimeSeriesData(timeCol, valueCol)
    if err != nil {
        return nil, err
    }

    // 按时间排序
    type timeValue struct {
        time  time.Time
        value float64
    }
    
    combined := make([]timeValue, len(dates))
    for i := range dates {
        combined[i] = timeValue{dates[i], values[i]}
    }
    
    sort.Slice(combined, func(i, j int) bool {
        return combined[i].time.Before(combined[j].time)
    })

    // 计算移动平均
    windowSize := 5
    trends := make([]models.TrendPoint, 0)
    
    for i := windowSize - 1; i < len(combined); i++ {
        sum := 0.0
        for j := 0; j < windowSize; j++ {
            sum += combined[i-j].value
        }
        avg := sum / float64(windowSize)
        
        trends = append(trends, models.TrendPoint{
            Time:  combined[i].time,
            Value: combined[i].value,
            Trend: avg,
        })
    }

    return trends, nil
}

// CalculateDistribution 计算数据分布
func (a *Analyzer) CalculateDistribution(column string) (*models.Distribution, error) {
    values, err := a.getColumnValues(column)
    if err != nil {
        return nil, err
    }

    // 计算数据范围
    min, max := minMax(values)
    binCount := int(math.Sqrt(float64(len(values)))) // 使用平方根规则确定箱数
    
    // 创建直方图
    binSize := (max - min) / float64(binCount)
    bins := make([]models.HistogramBin, binCount)
    
    for i := range bins {
        bins[i] = models.HistogramBin{
            Start: min + float64(i)*binSize,
            End:   min + float64(i+1)*binSize,
            Count: 0,
        }
    }

    // 统计每个箱子中的数据点数量
    for _, v := range values {
        binIndex := int((v - min) / binSize)
        if binIndex >= binCount {
            binIndex = binCount - 1
        }
        bins[binIndex].Count++
    }

    // 计算分位数
    sorted := make([]float64, len(values))
    copy(sorted, values)
    sort.Float64s(sorted)

    distribution := &models.Distribution{
        Bins:       bins,
        Mean:       mean(values),
        Median:     median(sorted),
        StdDev:     stdDev(values),
        Quantiles:  make(map[float64]float64),
        Skewness:   skewness(values),
        Kurtosis:   kurtosis(values),
    }

    // 计算四分位数
    for _, q := range []float64{0.25, 0.5, 0.75} {
        idx := int(float64(len(sorted)-1) * q)
        distribution.Quantiles[q] = sorted[idx]
    }

    return distribution, nil
}

// PerformOutlierAnalysis 进行异常值分析
func (a *Analyzer) PerformOutlierAnalysis(column string) (*models.OutlierAnalysis, error) {
    values, err := a.getColumnValues(column)
    if err != nil {
        return nil, err
    }

    // 计算四分位距
    sorted := make([]float64, len(values))
    copy(sorted, values)
    sort.Float64s(sorted)
    
    q1 := sorted[int(float64(len(sorted))*0.25)]
    q3 := sorted[int(float64(len(sorted))*0.75)]
    iqr := q3 - q1
    
    lowerBound := q1 - 1.5*iqr
    upperBound := q3 + 1.5*iqr

    // 识别异常值
    outliers := make([]models.Outlier, 0)
    for i, v := range values {
        if v < lowerBound || v > upperBound {
            outliers = append(outliers, models.Outlier{
                Value:    v,
                Index:    i,
                ZScore:   (v - mean(values)) / stdDev(values),
                IQRScore: (v - q1) / iqr,
            })
        }
    }

    return &models.OutlierAnalysis{
        Q1:          q1,
        Q3:          q3,
        IQR:         iqr,
        LowerBound:  lowerBound,
        UpperBound:  upperBound,
        OutlierCount: len(outliers),
        Outliers:    outliers,
    }, nil
}

// 辅助函数
func (a *Analyzer) getNumericColumns(col1, col2 string) ([]float64, []float64, error) {
    // 实现获取数值列的逻辑
    return nil, nil, nil
}

func (a *Analyzer) getTimeSeriesData(timeCol, valueCol string) ([]time.Time, []float64, error) {
    // 实现获取时间序列数据的逻辑
    return nil, nil, nil
}

func (a *Analyzer) getColumnValues(column string) ([]float64, error) {
    // 实现获取列值的逻辑
    return nil, nil
}

3.4 报告生成功能

go 复制代码
// services/report.go
package services

import (
    "bytes"
    "encoding/json"
    "html/template"
    "time"
    
    "github.com/jung-kurt/gofpdf"
    "github.com/xuri/excelize/v2"
    "github.com/your/stats/models"
)

// ReportGenerator 报告生成器
type ReportGenerator struct {
    dataset *models.DataSet
    analysis *models.AnalysisResult
}

// NewReportGenerator 创建报告生成器实例
func NewReportGenerator(dataset *models.DataSet, analysis *models.AnalysisResult) *ReportGenerator {
    return &ReportGenerator{
        dataset: dataset,
        analysis: analysis,
    }
}

// GenerateHTMLReport 生成HTML格式报告
func (r *ReportGenerator) GenerateHTMLReport() (string, error) {
    const reportTemplate = `
    <!DOCTYPE html>
    <html>
    <head>
        <title>数据分析报告</title>
        <style>
            body { font-family: Arial, sans-serif; }
            .header { text-align: center; margin: 20px 0; }
            .section { margin: 20px 0; }
            .table { width: 100%; border-collapse: collapse; }
            .table th, .table td { border: 1px solid #ddd; padding: 8px; }
            .chart { margin: 20px 0; }
        </style>
        <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
    </head>
    <body>
        <div class="header">
            <h1>数据分析报告</h1>
            <p>生成时间: {{.GeneratedAt}}</p>
        </div>

        <div class="section">
            <h2>数据集概览</h2>
            <p>数据集名称: {{.DataSetName}}</p>
            <p>记录数: {{.RowCount}}</p>
            <p>列数: {{.ColumnCount}}</p>
        </div>

        <div class="section">
            <h2>统计摘要</h2>
            <table class="table">
                <tr>
                    <th>列名</th>
                    <th>类型</th>
                    <th>均值</th>
                    <th>中位数</th>
                    <th>标准差</th>
                </tr>
                {{range .Columns}}
                <tr>
                    <td>{{.Name}}</td>
                    <td>{{.Type}}</td>
                    <td>{{printf "%.2f" .Stats.Mean}}</td>
                    <td>{{printf "%.2f" .Stats.Median}}</td>
                    <td>{{printf "%.2f" .Stats.StdDev}}</td>
                </tr>
                {{end}}
            </table>
        </div>

        <div class="section">
            <h2>相关性分析</h2>
            <div id="correlationHeatmap" class="chart"></div>
        </div>

        <div class="section">
            <h2>趋势分析</h2>
            <div id="trendChart" class="chart"></div>
        </div>

        <script>
            // 绘制相关性热图
            var correlationData = {{.CorrelationData}};
            Plotly.newPlot('correlationHeatmap', [{
                z: correlationData.values,
                x: correlationData.columns,
                y: correlationData.columns,
                type: 'heatmap',
                colorscale: 'Viridis'
            }]);

            // 绘制趋势图
            var trendData = {{.TrendData}};
            Plotly.newPlot('trendChart', [{
                x: trendData.dates,
                y: trendData.values,
                type: 'scatter',
                mode: 'lines+markers',
                name: '实际值'
            }, {
                x: trendData.dates,
                y: trendData.trend,
                type: 'scatter',
                mode: 'lines',
                name: '趋势'
            }]);
        </script>
    </body>
    </html>
    `

    tmpl, err := template.New("report").Parse(reportTemplate)
    if err != nil {
        return "", err
    }

    data := struct {
        GeneratedAt     string
        DataSetName     string
        RowCount        int
        ColumnCount     int
        Columns         []models.Column
        CorrelationData map[string]interface{}
        TrendData       map[string]interface{}
    }{
        GeneratedAt: time.Now().Format("2006-01-02 15:04:05"),
        DataSetName: r.dataset.Name,
        RowCount:    len(r.dataset.Rows),
        ColumnCount: len(r.dataset.Columns),
        Columns:     r.dataset.Columns,
        CorrelationData: r.prepareCorrelationData(),
        TrendData:       r.prepareTrendData(),
    }

    var buf bytes.Buffer
    if err := tmpl.Execute(&buf, data); err != nil {
        return "", err
    }

    return buf.String(), nil
}

// GeneratePDFReport 生成PDF格式报告
func (r *ReportGenerator) GeneratePDFReport() (*gofpdf.Fpdf, error) {
    pdf := gofpdf.New("P", "mm", "A4", "")
    pdf.AddPage()

    // 设置标题
    pdf.SetFont("Arial", "B", 16)
    pdf.Cell(190, 10, "数据分析报告")
    pdf.Ln(15)

    // 添加基本信息
    pdf.SetFont("Arial", "", 12)
    pdf.Cell(190, 8, "数据集: "+r.dataset.Name)
    pdf.Ln(10)
    pdf.Cell(190, 8, "生成时间: "+time.Now().Format("2006-01-02 15:04:05"))
    pdf.Ln(15)

    // 添加统计摘要表格
    pdf.SetFont("Arial", "B", 12)
    pdf.Cell(190, 10, "统计摘要")
    pdf.Ln(10)

    // 表格头部
    headers := []string{"列名", "类型", "均值", "中位数", "标准差"}
    for _, header := range headers {
        pdf.Cell(38, 10, header)
    }
    pdf.Ln(10)

    // 表格内容
    pdf.SetFont("Arial", "", 10)
    for _, col := range r.dataset.Columns {
        pdf.Cell(38, 8, col.Name)
        pdf.Cell(38, 8, col.Type)
        pdf.Cell(38, 8, fmt.Sprintf("%.2f", col.Stats.Mean))
        pdf.Cell(38, 8, fmt.Sprintf("%.2f", col.Stats.Median))
        pdf.Cell(38, 8, fmt.Sprintf("%.2f", col.Stats.StdDev))
        pdf.Ln(8)
    }

    return pdf, nil
}

// GenerateExcelReport 生成Excel格式报告
func (r *ReportGenerator) GenerateExcelReport() (*excelize.File, error) {
    f := excelize.NewFile()

    // 创建概览sheet
    overview := "概览"
    f.NewSheet(overview)
    f.SetCellValue(overview, "A1", "数据分析报告")
    f.SetCellValue(overview, "A2", "数据集名称")
    f.SetCellValue(overview, "B2", r.dataset.Name)
    f.SetCellValue(overview, "A3", "生成时间")
    f.SetCellValue(overview, "B3", time.Now().Format("2006-01-02 15:04:05"))

    // 创建统计摘要sheet
    summary := "统计摘要"
    f.NewSheet(summary)
    headers := []string{"列名", "类型", "均值", "中位数", "标准差", "最小值", "最大值"}
    for i, header := range headers {
        col := string(rune('A' + i))
        f.SetCellValue(summary, col+"1", header)
    }

    for i, col := range r.dataset.Columns {
        row := i + 2
        f.SetCellValue(summary, fmt.Sprintf("A%d", row), col.Name)
        f.SetCellValue(summary, fmt.Sprintf("B%d", row), col.Type)
        f.SetCellValue(summary, fmt.Sprintf("C%d", row), col.Stats.Mean)
        f.SetCellValue(summary, fmt.Sprintf("D%d", row), col.Stats.Median)
        f.SetCellValue(summary, fmt.Sprintf("E%d", row), col.Stats.StdDev)
        f.SetCellValue(summary, fmt.Sprintf("F%d", row), col.Stats.Min)
        f.SetCellValue(summary, fmt.Sprintf("G%d", row), col.Stats.Max)
    }

    return f, nil
}

// 准备相关性数据
func (r *ReportGenerator) prepareCorrelationData() map[string]interface{} {
    // 实现相关性数据准备逻辑
    return nil
}

// 准备趋势数据
func (r *ReportGenerator) prepareTrendData() map[string]interface{} {
    // 实现趋势数据准备逻辑
    return nil
}

3.5 Web界面实现

go 复制代码
// handlers/web.go
package handlers

import (
    "encoding/json"
    "net/http"
    "path/filepath"
    
    "github.com/gin-gonic/gin"
    "github.com/your/stats/services"
    "github.com/your/stats/models"
)

// WebHandler Web处理器
type WebHandler struct {
    processor *services.DataProcessor
    analyzer  *services.Analyzer
    reporter  *services.ReportGenerator
}

// NewWebHandler 创建Web处理器实例
func NewWebHandler() *WebHandler {
    return &WebHandler{
        processor: services.NewDataProcessor(),
    }
}

// SetupRoutes 设置路由
func (h *WebHandler) SetupRoutes(r *gin.Engine) {
    // 静态文件
    r.Static("/static", "./static")
    r.LoadHTMLGlob("templates/*")

    // 页面路由
    r.GET("/", h.handleHome)
    r.GET("/upload", h.handleUploadPage)
    r.GET("/analyze", h.handleAnalyzePage)
    r.GET("/report", h.handleReportPage)

    // API路由
    api := r.Group("/api")
    {
        api.POST("/upload", h.handleFileUpload)
        api.GET("/columns", h.handleGetColumns)
        api.POST("/analyze", h.handleAnalyze)
        api.GET("/stats/:column", h.handleColumnStats)
        api.POST("/report", h.handleGenerateReport)
        api.GET("/download/:format", h.handleDownloadReport)
    }
}

// handleHome 处理首页请求
func (h *WebHandler) handleHome(c *gin.Context) {
    c.HTML(http.StatusOK, "index.html", gin.H{
        "title": "统计分析工具",
    })
}

// handleFileUpload 处理文件上传
func (h *WebHandler) handleFileUpload(c *gin.Context) {
    file, err := c.FormFile("file")
    if err != nil {
        c.JSON(http.StatusBadRequest, gin.H{"error": "文件上传失败"})
        return
    }

    // 检查文件类型
    ext := filepath.Ext(file.Filename)
    if ext != ".csv" {
        c.JSON(http.StatusBadRequest, gin.H{"error": "仅支持CSV文件"})
        return
    }

    // 保存文件
    filename := filepath.Join("uploads", file.Filename)
    if err := c.SaveUploadedFile(file, filename); err != nil {
        c.JSON(http.StatusInternalServerError, gin.H{"error": "文件保存失败"})
        return
    }

    // 加载并处理文件
    if err := h.processor.LoadCSV(filename); err != nil {
        c.JSON(http.StatusInternalServerError, gin.H{"error": "文件处理失败"})
        return
    }

    // 初始化分析器
    h.analyzer = services.NewAnalyzer(h.processor.GetDataSet())

    c.JSON(http.StatusOK, gin.H{
        "message": "文件上传成功",
        "columns": h.processor.GetDataSet().Columns,
    })
}

// handleAnalyze 处理分析请求
func (h *WebHandler) handleAnalyze(c *gin.Context) {
    var req struct {
        Columns []string `json:"columns"`
        Types   []string `json:"types"`
    }

    if err := c.ShouldBindJSON(&req); err != nil {
        c.JSON(http.StatusBadRequest, gin.H{"error": "无效的请求参数"})
        return
    }

    // 执行分析
    results := make(map[string]interface{})
    
    for i, col := range req.Columns {
        switch req.Types[i] {
        case "distribution":
            dist, err := h.analyzer.CalculateDistribution(col)
            if err != nil {
                continue
            }
            results[col+"_distribution"] = dist

        case "outliers":
            outliers, err := h.analyzer.PerformOutlierAnalysis(col)
            if err != nil {
                continue
            }
            results[col+"_outliers"] = outliers

        case "trend":
            trends, err := h.analyzer.AnalyzeTrends("date", col)
            if err != nil {
                continue
            }
            results[col+"_trend"] = trends
        }
    }

    c.JSON(http.StatusOK, results)
}

// handleGenerateReport 处理报告生成请求
func (h *WebHandler) handleGenerateReport(c *gin.Context) {
    var req struct {
        Format string   `json:"format"`
        Charts []string `json:"charts"`
    }

    if err := c.ShouldBindJSON(&req); err != nil {
        c.JSON(http.StatusBadRequest, gin.H{"error": "无效的请求参数"})
        return
    }

    // 初始化报告生成器
    h.reporter = services.NewReportGenerator(
        h.processor.GetDataSet(),
        h.analyzer.GetAnalysisResult(),
    )

    var result interface{}
    var err error

    switch req.Format {
    case "html":
        result, err = h.reporter.GenerateHTMLReport()
    case "pdf":
        result, err = h.reporter.GeneratePDFReport()
    case "excel":
        result, err = h.reporter.GenerateExcelReport()
    default:
        c.JSON(http.StatusBadRequest, gin.H{"error": "不支持的报告格式"})
        return
    }

    if err != nil {
        c.JSON(http.StatusInternalServerError, gin.H{"error": "报告生成失败"})
        return
    }

    c.JSON(http.StatusOK, gin.H{
        "message": "报告生成成功",
        "result":  result,
    })
}

// handleColumnStats 处理获取列统计信息请求
func (h *WebHandler) handleColumnStats(c *gin.Context) {
    column := c.Param("column")
    stats, err := h.analyzer.GetColumnStats(column)
    if err != nil {
        c.JSON(http.StatusInternalServerError, gin.H{"error": "统计信息获取失败"})
        return
    }

    c.JSON(http.StatusOK, stats)
}

3.6 前端界面实现

go 复制代码
// templates/index.html
<!DOCTYPE html>
<html>
<head>
    <title>统计分析工具</title>
    <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/axios/dist/axios.min.js"></script>
</head>
<body class="bg-gray-100">
    <div class="container mx-auto px-4 py-8">
        <!-- 头部 -->
        <header class="bg-white shadow rounded-lg p-6 mb-8">
            <h1 class="text-3xl font-bold text-gray-800">统计分析工具</h1>
            <p class="text-gray-600 mt-2">上传数据文件,进行分析并生成报告</p>
        </header>

        <!-- 文件上传区域 -->
        <div class="bg-white shadow rounded-lg p-6 mb-8">
            <h2 class="text-xl font-semibold mb-4">数据文件上传</h2>
            <div class="border-dashed border-2 border-gray-300 rounded-lg p-6 text-center">
                <input type="file" id="fileInput" class="hidden" accept=".csv">
                <label for="fileInput" class="cursor-pointer">
                    <div class="text-gray-600">
                        <p>点击或拖拽文件到此处</p>
                        <p class="text-sm mt-1">支持 CSV 格式文件</p>
                    </div>
                </label>
            </div>
        </div>

        <!-- 数据分析区域 -->
        <div class="bg-white shadow rounded-lg p-6 mb-8" id="analysisSection" style="display: none;">
            <h2 class="text-xl font-semibold mb-4">数据分析</h2>
            
            <!-- 列选择 -->
            <div class="mb-6">
                <h3 class="font-medium mb-2">选择要分析的列</h3>
                <div id="columnSelect" class="grid grid-cols-3 gap-4">
                    <!-- 列选择项将通过JavaScript动态添加 -->
                </div>
            </div>

            <!-- 分析类型选择 -->
            <div class="mb-6">
                <h3 class="font-medium mb-2">选择分析类型</h3>
                <div class="grid grid-cols-3 gap-4">
                    <label class="flex items-center space-x-2">
                        <input type="checkbox" class="form-checkbox" value="distribution">
                        <span>分布分析</span>
                    </label>
                    <label class="flex items-center space-x-2">
                        <input type="checkbox" class="form-checkbox" value="correlation">
                        <span>相关性分析</span>
                    </label>
                    <label class="flex items-center space-x-2">
                        <input type="checkbox" class="form-checkbox" value="trend">
                        <span>趋势分析</span>
                    </label>
                </div>
            </div>

            <button id="analyzeBtn" class="bg-blue-500 text-white px-4 py-2 rounded hover:bg-blue-600">
                开始分析
            </button>
        </div>

        <!-- 分析结果展示区域 -->
        <div class="bg-white shadow rounded-lg p-6 mb-8" id="resultsSection" style="display: none;">
            <h2 class="text-xl font-semibold mb-4">分析结果</h2>
            
            <!-- 基础统计信息 -->
            <div class="mb-6">
                <h3 class="font-medium mb-2">基础统计信息</h3>
                <div id="basicStats" class="overflow-x-auto">
                    <!-- 统计表格将通过JavaScript动态添加 -->
                </div>
            </div>

            <!-- 图表展示 -->
            <div class="grid grid-cols-2 gap-6">
                <div>
                    <h3 class="font-medium mb-2">分布图</h3>
                    <div id="distributionChart" class="h-64"></div>
                </div>
                <div>
                    <h3 class="font-medium mb-2">趋势图</h3>
                    <div id="trendChart" class="h-64"></div>
                </div>
                <div class="col-span-2">
                    <h3 class="font-medium mb-2">相关性热图</h3>
                    <div id="correlationChart" class="h-96"></div>
                </div>
            </div>
        </div>

        <!-- 报告生成区域 -->
        <div class="bg-white shadow rounded-lg p-6" id="reportSection" style="display: none;">
            <h2 class="text-xl font-semibold mb-4">生成报告</h2>
            
            <div class="grid grid-cols-3 gap-4 mb-6">
                <button class="bg-green-500 text-white px-4 py-2 rounded hover:bg-green-600"
                        onclick="generateReport('html')">
                    生成HTML报告
                </button>
                <button class="bg-red-500 text-white px-4 py-2 rounded hover:bg-red-600"
                        onclick="generateReport('pdf')">
                    生成PDF报告
                </button>
                <button class="bg-blue-500 text-white px-4 py-2 rounded hover:bg-blue-600"
                        onclick="generateReport('excel')">
                    生成Excel报告
                </button>
            </div>
        </div>
    </div>

    <script>
        // 实现前端交互逻辑
    </script>
</body>
</html>

3.7 前端逻辑实现

go 复制代码
// static/js/main.js

// 全局状态管理
const state = {
    columns: [],
    currentData: null,
    analysisResults: null
};

// 初始化函数
document.addEventListener('DOMContentLoaded', () => {
    initializeFileUpload();
    initializeAnalysisControls();
});

// 文件上传处理
function initializeFileUpload() {
    const fileInput = document.getElementById('fileInput');
    const dropZone = document.querySelector('.border-dashed');

    // 文件拖拽处理
    dropZone.addEventListener('dragover', (e) => {
        e.preventDefault();
        dropZone.classList.add('border-blue-500');
    });

    dropZone.addEventListener('dragleave', () => {
        dropZone.classList.remove('border-blue-500');
    });

    dropZone.addEventListener('drop', (e) => {
        e.preventDefault();
        dropZone.classList.remove('border-blue-500');
        
        const file = e.dataTransfer.files[0];
        if (file && file.name.endsWith('.csv')) {
            handleFileUpload(file);
        } else {
            showError('请上传CSV文件');
        }
    });

    // 文件选择处理
    fileInput.addEventListener('change', (e) => {
        const file = e.target.files[0];
        if (file) {
            handleFileUpload(file);
        }
    });
}

// 处理文件上传
async function handleFileUpload(file) {
    const formData = new FormData();
    formData.append('file', file);

    try {
        const response = await axios.post('/api/upload', formData);
        state.columns = response.data.columns;
        
        // 显示分析区域
        document.getElementById('analysisSection').style.display = 'block';
        
        // 更新列选择器
        updateColumnSelect();
        
        showSuccess('文件上传成功');
    } catch (error) {
        showError('文件上传失败:' + error.message);
    }
}

// 更新列选择器
function updateColumnSelect() {
    const columnSelect = document.getElementById('columnSelect');
    columnSelect.innerHTML = state.columns.map(column => `
        <label class="flex items-center space-x-2">
            <input type="checkbox" class="form-checkbox" value="${column.Name}">
            <span>${column.Name} (${column.Type})</span>
        </label>
    `).join('');
}

// 初始化分析控制
function initializeAnalysisControls() {
    const analyzeBtn = document.getElementById('analyzeBtn');
    analyzeBtn.addEventListener('click', performAnalysis);
}

// 执行数据分析
async function performAnalysis() {
    // 获取选中的列和分析类型
    const selectedColumns = Array.from(document.querySelectorAll('#columnSelect input:checked'))
        .map(input => input.value);
    
    const selectedTypes = Array.from(document.querySelectorAll('input[type="checkbox"][value]:checked'))
        .map(input => input.value);

    if (selectedColumns.length === 0) {
        showError('请选择要分析的列');
        return;
    }

    try {
        const response = await axios.post('/api/analyze', {
            columns: selectedColumns,
            types: selectedTypes
        });

        state.analysisResults = response.data;
        
        // 显示结果区域
        document.getElementById('resultsSection').style.display = 'block';
        document.getElementById('reportSection').style.display = 'block';
        
        // 更新图表和统计信息
        updateResults();
        
        showSuccess('分析完成');
    } catch (error) {
        showError('分析失败:' + error.message);
    }
}

// 更新分析结果显示
function updateResults() {
    updateBasicStats();
    updateDistributionChart();
    updateTrendChart();
    updateCorrelationChart();
}

// 更新基础统计信息
function updateBasicStats() {
    const basicStats = document.getElementById('basicStats');
    const stats = state.analysisResults.basicStats;
    
    basicStats.innerHTML = `
        <table class="min-w-full">
            <thead>
                <tr>
                    <th class="px-4 py-2">列名</th>
                    <th class="px-4 py-2">均值</th>
                    <th class="px-4 py-2">中位数</th>
                    <th class="px-4 py-2">标准差</th>
                    <th class="px-4 py-2">最小值</th>
                    <th class="px-4 py-2">最大值</th>
                </tr>
            </thead>
            <tbody>
                ${Object.entries(stats).map(([column, stat]) => `
                    <tr>
                        <td class="border px-4 py-2">${column}</td>
                        <td class="border px-4 py-2">${stat.mean.toFixed(2)}</td>
                        <td class="border px-4 py-2">${stat.median.toFixed(2)}</td>
                        <td class="border px-4 py-2">${stat.stdDev.toFixed(2)}</td>
                        <td class="border px-4 py-2">${stat.min.toFixed(2)}</td>
                        <td class="border px-4 py-2">${stat.max.toFixed(2)}</td>
                    </tr>
                `).join('')}
            </tbody>
        </table>
    `;
}

// 更新分布图
function updateDistributionChart() {
    const distributions = state.analysisResults.distributions;
    Object.entries(distributions).forEach(([column, data]) => {
        Plotly.newPlot('distributionChart', [{
            x: data.values,
            type: 'histogram',
            name: column
        }], {
            title: `${column} 分布图`,
            xaxis: { title: '值' },
            yaxis: { title: '频数' }
        });
    });
}

// 更新趋势图
function updateTrendChart() {
    const trends = state.analysisResults.trends;
    const traces = Object.entries(trends).map(([column, data]) => ({
        x: data.dates,
        y: data.values,
        type: 'scatter',
        mode: 'lines+markers',
        name: column
    }));

    Plotly.newPlot('trendChart', traces, {
        title: '趋势分析',
        xaxis: { title: '时间' },
        yaxis: { title: '值' }
    });
}

// 更新相关性热图
function updateCorrelationChart() {
    const correlation = state.analysisResults.correlation;
    Plotly.newPlot('correlationChart', [{
        z: correlation.values,
        x: correlation.columns,
        y: correlation.columns,
        type: 'heatmap',
        colorscale: 'Viridis'
    }], {
        title: '相关性分析',
        width: 800,
        height: 800
    });
}

// 生成报告
async function generateReport(format) {
    try {
        const response = await axios.post('/api/report', {
            format,
            charts: ['distribution', 'trend', 'correlation']
        });

        if (format === 'html') {
            // 在新窗口中打开HTML报告
            const win = window.open();
            win.document.write(response.data.result);
        } else {
            // 下载PDF或Excel报告
            const blob = new Blob([response.data.result], {
                type: format === 'pdf' ? 'application/pdf' : 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
            });
            const url = window.URL.createObjectURL(blob);
            const a = document.createElement('a');
            a.href = url;
            a.download = `分析报告.${format}`;
            a.click();
            window.URL.revokeObjectURL(url);
        }

        showSuccess('报告生成成功');
    } catch (error) {
        showError('报告生成失败:' + error.message);
    }
}

// 工具函数:显示成功消息
function showSuccess(message) {
    // 实现提示消息显示逻辑
}

// 工具函数:显示错误消息
function showError(message) {
    // 实现错误消息显示逻辑
}

4. 项目测试实现

go 复制代码
// tests/processor_test.go
package tests

import (
    "testing"
    "os"
    "github.com/stretchr/testify/assert"
    "github.com/your/stats/services"
)

func TestDataProcessor(t *testing.T) {
    // 准备测试数据
    testData := `Column1,Column2,Column3
1,2.5,text
2,3.5,sample
3,4.5,data
`
    tmpfile, err := os.CreateTemp("", "test.csv")
    if err != nil {
        t.Fatal(err)
    }
    defer os.Remove(tmpfile.Name())

    if _, err := tmpfile.Write([]byte(testData)); err != nil {
        t.Fatal(err)
    }
    if err := tmpfile.Close(); err != nil {
        t.Fatal(err)
    }

    // 创建处理器实例
    processor := services.NewDataProcessor()

    // 测试加载CSV文件
    t.Run("LoadCSV", func(t *testing.T) {
        err := processor.LoadCSV(tmpfile.Name())
        assert.NoError(t, err)

        dataset := processor.GetDataSet()
        assert.Equal(t, 3, len(dataset.Columns))
        assert.Equal(t, 3, len(dataset.Rows))
    })

    // 测试数据类型推断
    t.Run("DataTypeInference", func(t *testing.T) {
        dataset := processor.GetDataSet()
        assert.Equal(t, "float", dataset.Columns[0].Type)
        assert.Equal(t, "float", dataset.Columns[1].Type)
        assert.Equal(t, "string", dataset.Columns[2].Type)
    })

    // 测试统计计算
    t.Run("Statistics", func(t *testing.T) {
        err := processor.CalculateStatistics()
        assert.NoError(t, err)

        dataset := processor.GetDataSet()
        stats := dataset.Columns[0].Stats

        assert.Equal(t, 3, stats.Count)
        assert.InDelta(t, 2.0, stats.Mean, 0.001)
        assert.InDelta(t, 2.0, stats.Median, 0.001)
    })
}

// tests/analyzer_test.go
func TestAnalyzer(t *testing.T) {
    // 准备测试数据
    dataset := &models.DataSet{
        Columns: []models.Column{
            {Name: "Col1", Type: "float"},
            {Name: "Col2", Type: "float"},
        },
        Rows: []models.Row{
            {Values: []interface{}{"1.0", "2.0"}},
            {Values: []interface{}{"2.0", "4.0"}},
            {Values: []interface{}{"3.0", "6.0"}},
        },
    }

    analyzer := services.NewAnalyzer(dataset)

    // 测试相关性分析
    t.Run("Correlation", func(t *testing.T) {
        corr, err := analyzer.CalculateCorrelation("Col1", "Col2")
        assert.NoError(t, err)
        assert.InDelta(t, 1.0, corr, 0.001) // 完全正相关
    })

    // 测试分布分析
    t.Run("Distribution", func(t *testing.T) {
        dist, err := analyzer.CalculateDistribution("Col1")
        assert.NoError(t, err)
        assert.InDelta(t, 2.0, dist.Mean, 0.001)
        assert.InDelta(t, 1.0, dist.StdDev, 0.001)
    })

    // 测试异常值检测
    t.Run("Outliers", func(t *testing.T) {
        outliers, err := analyzer.PerformOutlierAnalysis("Col1")
        assert.NoError(t, err)
        assert.Equal(t, 0, outliers.OutlierCount) // 示例数据中没有异常值
    })
}

// tests/reporter_test.go
func TestReportGenerator(t *testing.T) {
    // 准备测试数据
    dataset := &models.DataSet{
        Name: "TestData",
        Columns: []models.Column{
            {
                Name: "Col1",
                Type: "float",
                Stats: models.Statistics{
                    Mean: 2.0,
                    Median: 2.0,
                    StdDev: 1.0,
                },
            },
        },
    }

    analysis := &models.AnalysisResult{
        DataSetName: "TestData",
        ColumnStats: map[string]models.Statistics{
            "Col1": dataset.Columns[0].Stats,
        },
    }

    reporter := services.NewReportGenerator(dataset, analysis)

    // 测试HTML报告生成
    t.Run("HTMLReport", func(t *testing.T) {
        html, err := reporter.GenerateHTMLReport()
        assert.NoError(t, err)
        assert.Contains(t, html, "数据分析报告")
        assert.Contains(t, html, "TestData")
    })

    // 测试PDF报告生成
    t.Run("PDFReport", func(t *testing.T) {
        pdf, err := reporter.GeneratePDFReport()
        assert.NoError(t, err)
        assert.NotNil(t, pdf)
    })

    // 测试Excel报告生成
    t.Run("ExcelReport", func(t *testing.T) {
        excel, err := reporter.GenerateExcelReport()
        assert.NoError(t, err)
        assert.NotNil(t, excel)
    })
}

// tests/integration_test.go
func TestIntegration(t *testing.T) {
    // 准备测试服务器
    router := gin.New()
    handler := handlers.NewWebHandler()
    handler.SetupRoutes(router)

    // 测试文件上传和分析流程
    t.Run("FullAnalysisFlow", func(t *testing.T) {
        // 1. 上传文件
        w := httptest.NewRecorder()
        req := createMultipartRequest(t, "test.csv", testData)
        router.ServeHTTP(w, req)
        assert.Equal(t, http.StatusOK, w.Code)

        // 2. 执行分析
        w = httptest.NewRecorder()
        analysisReq := `{"columns":["Col1"],"types":["distribution"]}`
        req = httptest.NewRequest("POST", "/api/analyze",
            bytes.NewBufferString(analysisReq))
        router.ServeHTTP(w, req)
        assert.Equal(t, http.StatusOK, w.Code)

        // 3. 生成报告
        w = httptest.NewRecorder()
        reportReq := `{"format":"html","charts":["distribution"]}`
        req = httptest.NewRequest("POST", "/api/report",
            bytes.NewBufferString(reportReq))
        router.ServeHTTP(w, req)
        assert.Equal(t, http.StatusOK, w.Code)
    })
}

// 辅助函数:创建多部分请求
func createMultipartRequest(t *testing.T, filename string, content string) *http.Request {
    var b bytes.Buffer
    writer := multipart.NewWriter(&b)
    
    part, err := writer.CreateFormFile("file", filename)
    if err != nil {
        t.Fatal(err)
    }
    part.Write([]byte(content))
    writer.Close()

    req := httptest.NewRequest("POST", "/api/upload", &b)
    req.Header.Set("Content-Type", writer.FormDataContentType())
    return req
}

5. 部署和运维

5.1 项目部署流程图

5.2 Docker配置文件

go 复制代码
# Dockerfile
FROM golang:1.19-alpine AS builder

WORKDIR /app

# 安装基本依赖
RUN apk add --no-cache gcc musl-dev git

# 复制项目文件
COPY . .

# 下载依赖
RUN go mod download

# 编译
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o main cmd/main.go

# 最终镜像
FROM alpine:3.14

WORKDIR /app

# 从builder阶段复制编译好的程序
COPY --from=builder /app/main .
COPY --from=builder /app/templates ./templates
COPY --from=builder /app/static ./static

# 创建必要的目录
RUN mkdir -p /app/uploads

# 设置环境变量
ENV GIN_MODE=release
ENV PORT=8080

EXPOSE 8080

CMD ["./main"]

# docker-compose.yml
version: '3.8'

services:
  stats-analyzer:
    build: .
    ports:
      - "8080:8080"
    volumes:
      - ./uploads:/app/uploads
      - ./configs:/app/configs
    environment:
      - GIN_MODE=release
      - PORT=8080
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080/health"]
      interval: 30s
      timeout: 10s
      retries: 3

  prometheus:
    image: prom/prometheus:latest
    ports:
      - "9090:9090"
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
    command:
      - --config.file=/etc/prometheus/prometheus.yml
    restart: unless-stopped

  grafana:
    image: grafana/grafana:latest
    ports:
      - "3000:3000"
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=admin
    volumes:
      - grafana-storage:/var/lib/grafana
    depends_on:
      - prometheus
    restart: unless-stopped

volumes:
  grafana-storage:

5.3 监控配置

go 复制代码
# prometheus.yml
global:
  scrape_interval: 15s
  evaluation_interval: 15s

alerting:
  alertmanagers:
    - static_configs:
        - targets:
          - alertmanager:9093

rule_files:
  - "rules/*.yml"

scrape_configs:
  - job_name: 'stats-analyzer'
    static_configs:
      - targets: ['stats-analyzer:8080']
        labels:
          service: 'stats-analyzer'

  - job_name: 'node-exporter'
    static_configs:
      - targets: ['node-exporter:9100']

# rules/alert_rules.yml
groups:
  - name: stats_analyzer_alerts
    rules:
      # 服务可用性告警
      - alert: ServiceDown
        expr: up{service="stats-analyzer"} == 0
        for: 1m
        labels:
          severity: critical
        annotations:
          summary: "统计分析服务不可用"
          description: "服务已停止运行超过1分钟"

      # 高错误率告警
      - alert: HighErrorRate
        expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: "服务错误率过高"
          description: "5分钟内错误率超过5%"

      # 响应时间告警
      - alert: SlowResponse
        expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 1
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "服务响应过慢"
          description: "95%的请求响应时间超过1秒"

      # 内存使用告警
      - alert: HighMemoryUsage
        expr: process_resident_memory_bytes{service="stats-analyzer"} / node_memory_MemTotal_bytes * 100 > 80
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "内存使用率过高"
          description: "内存使用率超过80%"

# grafana/dashboards/stats_analyzer.json
{
  "annotations": {
    "list": []
  },
  "editable": true,
  "fiscalYearStartMonth": 0,
  "graphTooltip": 0,
  "id": 1,
  "links": [],
  "liveNow": false,
  "panels": [
    {
      "datasource": {
        "type": "prometheus",
        "uid": "prometheus"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 0
      },
      "id": 1,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "title": "请求数量",
      "type": "timeseries"
    }
  ],
  "refresh": "5s",
  "schemaVersion": 38,
  "style": "dark",
  "tags": ["stats-analyzer"],
  "templating": {
    "list": []
  },
  "time": {
    "from": "now-6h",
    "to": "now"
  },
  "timepicker": {},
  "timezone": "",
  "title": "统计分析服务监控",
  "uid": "stats_analyzer",
  "version": 1,
  "weekStart": ""
}

6. 项目文档

6.1 API文档

go 复制代码
# 统计分析工具 API 文档

## 基本信息

- 基础路径: `/api/v1`
- 支持格式: JSON
- 认证方式: 无

## API 端点

### 1. 文件上传

#### POST /upload

上传CSV文件进行分析。

**请求参数:**

- Content-Type: multipart/form-data
- 参数名:file

**响应:**

```json
{
    "status": "success",
    "columns": [
        {
            "name": "Column1",
            "type": "float"
        },
        {
            "name": "Column2",
            "type": "string"
        }
    ]
}

2. 数据分析

POST /analyze

执行数据分析。

请求参数:

json 复制代码
{
    "columns": ["Column1", "Column2"],
    "types": ["distribution", "correlation", "trend"]
}

响应:

json 复制代码
{
    "status": "success",
    "results": {
        "distributions": {
            "Column1": {
                "mean": 45.6,
                "median": 42.0,
                "stdDev": 12.3,
                "bins": [
                    {"start": 0, "end": 10, "count": 5},
                    {"start": 10, "end": 20, "count": 8}
                ]
            }
        },
        "correlations": {
            "Column1": {
                "Column2": 0.85
            }
        },
        "trends": {
            "Column1": [
                {"date": "2024-01-01", "value": 42.1},
                {"date": "2024-01-02", "value": 43.5}
            ]
        }
    }
}

3. 报告生成

POST /report

生成分析报告。

请求参数:

json 复制代码
{
    "format": "html",  // 支持: html, pdf, excel
    "charts": ["distribution", "trend", "correlation"]
}

响应:

  • Format: html

    json 复制代码
    {
        "status": "success",
        "content": "<html>...</html>"
    }
  • Format: pdf/excel

    json 复制代码
    {
        "status": "success",
        "download_url": "/downloads/report_123.pdf"
    }

4. 列统计信息

GET /stats/:column

获取指定列的统计信息。

响应:

json 复制代码
{
    "status": "success",
    "stats": {
        "count": 1000,
        "mean": 45.6,
        "median": 42.0,
        "mode": 40.0,
        "stdDev": 12.3,
        "min": 10.0,
        "max": 90.0,
        "percentiles": {
            "25": 35.0,
            "50": 42.0,
            "75": 55.0,
            "95": 70.0
        }
    }
}

5. 异常值检测

POST /outliers

检测指定列的异常值。

请求参数:

json 复制代码
{
    "column": "Column1",
    "method": "iqr"  // 支持: iqr, zscore
}

响应:

json 复制代码
{
    "status": "success",
    "outliers": {
        "count": 5,
        "values": [
            {"index": 10, "value": 150.0, "score": 3.5},
            {"index": 20, "value": 5.0, "score": -2.8}
        ],
        "bounds": {
            "lower": 10.0,
            "upper": 90.0
        }
    }
}

错误码说明

错误码 说明
400 请求参数错误
404 资源不存在
415 不支持的文件类型
500 服务器内部错误

使用示例

Python 示例

python 复制代码
import requests

# 上传文件
files = {'file': open('data.csv', 'rb')}
response = requests.post('http://localhost:8080/api/v1/upload', files=files)
print(response.json())

# 执行分析
analysis_req = {
    'columns': ['Column1'],
    'types': ['distribution']
}
response = requests.post('http://localhost:8080/api/v1/analyze', json=analysis_req)
print(response.json())

JavaScript 示例

javascript 复制代码
// 上传文件
const formData = new FormData();
formData.append('file', file);

fetch('/api/v1/upload', {
    method: 'POST',
    body: formData
})
.then(response => response.json())
.then(data => console.log(data));

// 执行分析
fetch('/api/v1/analyze', {
    method: 'POST',
    headers: {
        'Content-Type': 'application/json'
    },
    body: JSON.stringify({
        columns: ['Column1'],
        types: ['distribution']
    })
})
.then(response => response.json())
.then(data => console.log(data));

7. 项目总结

7.1 功能特点

  1. 数据处理能力

    • 支持大规模CSV文件处理
    • 自动数据类型推断
    • 智能数据清洗
    • 高效数据转换
  2. 分析功能

    • 全面的统计分析
    • 高级数据挖掘
    • 可视化图表生成
    • 自动报告生成
  3. 用户体验

    • 直观的Web界面
    • 交互式数据探索
    • 灵活的配置选项
    • 多格式报告导出
  4. 系统性能

    • 并发处理支持
    • 内存优化设计
    • 缓存加速
    • 异步任务处理

7.2 技术亮点

  1. Go语言优势运用

    • goroutine并发处理
    • channel通信机制
    • 接口设计模式
    • 高效内存管理
  2. 架构设计

    • 模块化组织
    • 松耦合设计
    • 可扩展接口
    • 清晰的代码结构
  3. 工程实践

    • 完整的测试覆盖
    • 持续集成部署
    • 监控告警机制
    • 容器化部署

7.3 后续优化方向

  1. 功能增强

    • 支持更多数据源
    • 添加机器学习模型
    • 扩展分析方法
    • 优化报告模板
  2. 性能提升

    • 分布式处理
    • 数据库优化
    • 缓存策略改进
    • 算法优化
  3. 用户体验

    • 界面美化
    • 操作流程优化
    • 响应速度提升
    • 移动端支持
  4. 运维支持

    • 自动化部署
    • 监控完善
    • 日志分析
    • 故障恢复

本项目展示了一个完整的Go语言统计分析工具的设计和实现过程,涵盖了从数据处理到可视化报告生成的全流程。通过合理的架构设计和模块划分,实现了高效、可靠的数据分析功能,为用户提供了便捷的数据分析工具。


怎么样今天的内容还满意吗?再次感谢观众老爷的观看,关注GZH:凡人的AI工具箱,回复666,送您价值199的AI大礼包。最后,祝您早日实现财务自由,还请给个赞,谢谢!

相关推荐
自然数e4 小时前
C++多线程【线程管控】之线程转移以及线程数量和ID
开发语言·c++·算法·多线程
Arva .4 小时前
ConcurrentHashMap 的线程安全实现
java·开发语言
禾风wyh5 小时前
(ICLR 2019)APPNP传播用 PageRank,不用神经网络!
人工智能·深度学习·神经网络
昂子的博客5 小时前
Redis缓存 更新策略 双写一致 缓存穿透 击穿 雪崩 解决方案... 一篇文章带你学透
java·数据库·redis·后端·spring·缓存
Dxy12393102165 小时前
Python为什么要使用可迭代对象
开发语言·python
xixixi777775 小时前
了解一下APM工具——就像给软件系统装的“全身CT”,能实时透视从用户点击到后端数据库的每个环节性能,精准定位哪里慢、为什么慢
数据库·安全·数据采集·apm·日志监控
Keep_Trying_Go5 小时前
论文STEERER人群计数,车辆计数以及农作物计数算法详解(pytorch)
人工智能·pytorch·python
gzu_015 小时前
基于昇腾 配置pytorch环境
人工智能·pytorch·python
陈 洪 伟5 小时前
AI理论知识系统复习(6):梯度饱和、梯度消失、梯度爆炸
人工智能
云在Steven5 小时前
在线确定性算法与自适应启发式在虚拟机动态整合中的竞争分析与性能优化
人工智能·算法·性能优化