golang运维平台实战,服务树,日志监控,任务执行,分布式探测

Golang运维平台实战项目:从零构建现代化运维系统

本文将深入探讨基于Golang的运维平台实战开发,通过核心代码展示如何构建一个功能完整的运维管理系统。

项目架构与核心设计

1. 项目结构设计

go 复制代码
project/
├── cmd/
│   └── server/
│       └── main.go
├── internal/
│   ├── handler/
│   ├── service/
│   ├── repository/
│   └── model/
├── pkg/
│   ├── config/
│   ├── database/
│   ├── logger/
│   └── middleware/
└── go.mod

2. 核心模型定义

csharp 复制代码
// internal/model/server.go
package model

import (
    "time"
)

type Server struct {
    ID          uint      `json:"id" gorm:"primarykey"`
    Name        string    `json:"name" gorm:"size:100;not null"`
    IP          string    `json:"ip" gorm:"size:15;not null;uniqueIndex"`
    Port        int       `json:"port" gorm:"default:22"`
    OS          string    `json:"os" gorm:"size:50"`
    Status      string    `json:"status" gorm:"size:20;default:unknown"`
    CreatedAt   time.Time `json:"created_at"`
    UpdatedAt   time.Time `json:"updated_at"`
}

type ServerStats struct {
    ID          uint      `json:"id"`
    ServerID    uint      `json:"server_id" gorm:"index"`
    CPUUsage    float64   `json:"cpu_usage"`
    MemoryUsage float64   `json:"memory_usage"`
    DiskUsage   float64   `json:"disk_usage"`
    CreatedAt   time.Time `json:"created_at"`
}

3. 配置管理模块

go 复制代码
// pkg/config/config.go
package config

import (
    "github.com/spf13/viper"
    "log"
)

type Config struct {
    Server struct {
        Port int `mapstructure:"port"`
    } `mapstructure:"server"`
    
    Database struct {
        Host     string `mapstructure:"host"`
        Port     int    `mapstructure:"port"`
        User     string `mapstructure:"user"`
        Password string `mapstructure:"password"`
        DBName   string `mapstructure:"dbname"`
    } `mapstructure:"database"`
    
    SSH struct {
        PrivateKeyPath string `mapstructure:"private_key_path"`
        Timeout        int    `mapstructure:"timeout"`
    } `mapstructure:"ssh"`
}

func Load() *Config {
    var cfg Config
    
    viper.SetConfigName("config")
    viper.SetConfigType("yaml")
    viper.AddConfigPath(".")
    viper.AddConfigPath("./config")
    
    viper.SetDefault("server.port", 8080)
    viper.SetDefault("ssh.timeout", 30)
    
    if err := viper.ReadInConfig(); err != nil {
        log.Fatalf("Error reading config file: %v", err)
    }
    
    if err := viper.Unmarshal(&cfg); err != nil {
        log.Fatalf("Unable to decode into struct: %v", err)
    }
    
    return &cfg
}

核心功能实现

1. SSH连接池管理

go 复制代码
// pkg/ssh/pool.go
package ssh

import (
    "fmt"
    "net"
    "sync"
    "time"
    
    "golang.org/x/crypto/ssh"
)

type Connection struct {
    *ssh.Client
    LastUsed time.Time
}

type Pool struct {
    connections map[string]*Connection
    mutex       sync.RWMutex
    maxIdle     time.Duration
    config      *ssh.ClientConfig
}

func NewPool(privateKeyPath string, maxIdle time.Duration) (*Pool, error) {
    key, err := parsePrivateKey(privateKeyPath)
    if err != nil {
        return nil, err
    }
    
    config := &ssh.ClientConfig{
        User: "root",
        Auth: []ssh.AuthMethod{
            ssh.PublicKeys(key),
        },
        HostKeyCallback: ssh.InsecureIgnoreHostKey(),
        Timeout:         30 * time.Second,
    }
    
    return &Pool{
        connections: make(map[string]*Connection),
        maxIdle:     maxIdle,
        config:      config,
    }, nil
}

func (p *Pool) Get(host string, port int) (*Connection, error) {
    key := fmt.Sprintf("%s:%d", host, port)
    
    p.mutex.RLock()
    if conn, exists := p.connections[key]; exists {
        if time.Since(conn.LastUsed) < p.maxIdle {
            conn.LastUsed = time.Now()
            p.mutex.RUnlock()
            return conn, nil
        }
        // 连接过期,关闭并移除
        conn.Close()
        delete(p.connections, key)
    }
    p.mutex.RUnlock()
    
    // 创建新连接
    p.mutex.Lock()
    defer p.mutex.Unlock()
    
    address := fmt.Sprintf("%s:%d", host, port)
    client, err := ssh.Dial("tcp", address, p.config)
    if err != nil {
        return nil, fmt.Errorf("failed to dial: %v", err)
    }
    
    conn := &Connection{
        Client:   client,
        LastUsed: time.Now(),
    }
    
    p.connections[key] = conn
    return conn, nil
}

func (p *Pool) Cleanup() {
    p.mutex.Lock()
    defer p.mutex.Unlock()
    
    for key, conn := range p.connections {
        if time.Since(conn.LastUsed) > p.maxIdle {
            conn.Close()
            delete(p.connections, key)
        }
    }
}

2. 服务器监控服务

go 复制代码
// internal/service/monitor.go
package service

import (
    "bufio"
    "fmt"
    "strconv"
    "strings"
    "time"
    
    "golang.org/x/crypto/ssh"
)

type MonitorService struct {
    sshPool *ssh.Pool
}

func NewMonitorService(sshPool *ssh.Pool) *MonitorService {
    return &MonitorService{sshPool: sshPool}
}

func (s *MonitorService) GetServerStats(host string, port int) (*ServerStats, error) {
    conn, err := s.sshPool.Get(host, port)
    if err != nil {
        return nil, err
    }
    
    session, err := conn.NewSession()
    if err != nil {
        return nil, err
    }
    defer session.Close()
    
    // 获取CPU使用率
    cpuUsage, err := s.getCPUUsage(session)
    if err != nil {
        return nil, err
    }
    
    // 获取内存使用率
    memUsage, err := s.getMemoryUsage(session)
    if err != nil {
        return nil, err
    }
    
    // 获取磁盘使用率
    diskUsage, err := s.getDiskUsage(session)
    if err != nil {
        return nil, err
    }
    
    return &ServerStats{
        CPUUsage:    cpuUsage,
        MemoryUsage: memUsage,
        DiskUsage:   diskUsage,
        CreatedAt:   time.Now(),
    }, nil
}

func (s *MonitorService) getCPUUsage(session *ssh.Session) (float64, error) {
    output, err := session.Output("grep 'cpu ' /proc/stat | awk '{usage=($2+$4)*100/($2+$4+$5)} END {print usage}'")
    if err != nil {
        return 0, err
    }
    
    return strconv.ParseFloat(strings.TrimSpace(string(output)), 64)
}

func (s *MonitorService) getMemoryUsage(session *ssh.Session) (float64, error) {
    output, err := session.Output("free | grep Mem | awk '{print $3/$2 * 100.0}'")
    if err != nil {
        return 0, err
    }
    
    return strconv.ParseFloat(strings.TrimSpace(string(output)), 64)
}

func (s *MonitorService) getDiskUsage(session *ssh.Session) (float64, error) {
    output, err := session.Output("df / | awk 'NR==2 {print $5}' | sed 's/%//'")
    if err != nil {
        return 0, err
    }
    
    return strconv.ParseFloat(strings.TrimSpace(string(output)), 64)
}

// 批量执行命令
func (s *MonitorService) ExecuteBatchCommand(hosts []string, port int, command string) (map[string]string, error) {
    results := make(map[string]string)
    
    for _, host := range hosts {
        conn, err := s.sshPool.Get(host, port)
        if err != nil {
            results[host] = fmt.Sprintf("连接失败: %v", err)
            continue
        }
        
        session, err := conn.NewSession()
        if err != nil {
            results[host] = fmt.Sprintf("创建会话失败: %v", err)
            continue
        }
        
        output, err := session.CombinedOutput(command)
        session.Close()
        
        if err != nil {
            results[host] = fmt.Sprintf("执行失败: %v", err)
        } else {
            results[host] = string(output)
        }
    }
    
    return results, nil
}

3. Web API处理器

go 复制代码
// internal/handler/server.go
package handler

import (
    "net/http"
    "strconv"
    
    "github.com/gin-gonic/gin"
    "github.com/your-username/ops-platform/internal/model"
    "github.com/your-username/ops-platform/internal/service"
)

type ServerHandler struct {
    serverService *service.ServerService
    monitorService *service.MonitorService
}

func NewServerHandler(serverService *service.ServerService, monitorService *service.MonitorService) *ServerHandler {
    return &ServerHandler{
        serverService: serverService,
        monitorService: monitorService,
    }
}

func (h *ServerHandler) CreateServer(c *gin.Context) {
    var server model.Server
    if err := c.ShouldBindJSON(&server); err != nil {
        c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
        return
    }
    
    if err := h.serverService.Create(&server); err != nil {
        c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
        return
    }
    
    c.JSON(http.StatusCreated, server)
}

func (h *ServerHandler) GetServerStats(c *gin.Context) {
    id, err := strconv.Atoi(c.Param("id"))
    if err != nil {
        c.JSON(http.StatusBadRequest, gin.H{"error": "无效的服务器ID"})
        return
    }
    
    server, err := h.serverService.GetByID(uint(id))
    if err != nil {
        c.JSON(http.StatusNotFound, gin.H{"error": "服务器不存在"})
        return
    }
    
    stats, err := h.monitorService.GetServerStats(server.IP, server.Port)
    if err != nil {
        c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
        return
    }
    
    c.JSON(http.StatusOK, stats)
}

func (h *ServerHandler) BatchCommand(c *gin.Context) {
    var request struct {
        ServerIDs []uint `json:"server_ids"`
        Command   string `json:"command"`
    }
    
    if err := c.ShouldBindJSON(&request); err != nil {
        c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
        return
    }
    
    servers, err := h.serverService.GetByIDs(request.ServerIDs)
    if err != nil {
        c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
        return
    }
    
    var hosts []string
    for _, server := range servers {
        hosts = append(hosts, server.IP)
    }
    
    results, err := h.monitorService.ExecuteBatchCommand(hosts, 22, request.Command)
    if err != nil {
        c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
        return
    }
    
    c.JSON(http.StatusOK, results)
}

4. 定时任务调度器

go 复制代码
// pkg/cron/scheduler.go
package cron

import (
    "log"
    "time"
    
    "github.com/robfig/cron/v3"
    "github.com/your-username/ops-platform/internal/service"
)

type Scheduler struct {
    cron        *cron.Cron
    monitorService *service.MonitorService
    serverService  *service.ServerService
}

func NewScheduler(monitorService *service.MonitorService, serverService *service.ServerService) *Scheduler {
    return &Scheduler{
        cron:          cron.New(),
        monitorService: monitorService,
        serverService:  serverService,
    }
}

func (s *Scheduler) Start() {
    // 每5分钟收集服务器状态
    s.cron.AddFunc("*/5 * * * *", s.collectServerStats)
    
    // 每天凌晨2点清理旧数据
    s.cron.AddFunc("0 2 * * *", s.cleanupOldData)
    
    s.cron.Start()
    log.Println("定时任务调度器已启动")
}

func (s *Scheduler) Stop() {
    s.cron.Stop()
    log.Println("定时任务调度器已停止")
}

func (s *Scheduler) collectServerStats() {
    log.Println("开始收集服务器状态...")
    
    servers, err := s.serverService.GetAll()
    if err != nil {
        log.Printf("获取服务器列表失败: %v", err)
        return
    }
    
    for _, server := range servers {
        go func(server model.Server) {
            stats, err := s.monitorService.GetServerStats(server.IP, server.Port)
            if err != nil {
                log.Printf("收集服务器 %s 状态失败: %v", server.Name, err)
                return
            }
            
            stats.ServerID = server.ID
            if err := s.monitorService.SaveStats(stats); err != nil {
                log.Printf("保存服务器状态失败: %v", err)
            }
        }(server)
    }
}

func (s *Scheduler) cleanupOldData() {
    log.Println("开始清理旧数据...")
    threshold := time.Now().AddDate(0, -1, 0) // 保留一个月数据
    
    if err := s.monitorService.CleanupOldStats(threshold); err != nil {
        log.Printf("清理旧数据失败: %v", err)
    }
}

5. 主程序入口

go 复制代码
// cmd/server/main.go
package main

import (
    "log"
    "net/http"
    "os"
    "os/signal"
    "syscall"
    
    "github.com/gin-gonic/gin"
    "github.com/your-username/ops-platform/internal/handler"
    "github.com/your-username/ops-platform/internal/repository"
    "github.com/your-username/ops-platform/internal/service"
    "github.com/your-username/ops-platform/pkg/config"
    "github.com/your-username/ops-platform/pkg/cron"
    "github.com/your-username/ops-platform/pkg/database"
    "github.com/your-username/ops-platform/pkg/ssh"
)

func main() {
    // 加载配置
    cfg := config.Load()
    
    // 初始化数据库
    db, err := database.Init(&cfg.Database)
    if err != nil {
        log.Fatalf("数据库初始化失败: %v", err)
    }
    
    // 初始化SSH连接池
    sshPool, err := ssh.NewPool(cfg.SSH.PrivateKeyPath, 10*time.Minute)
    if err != nil {
        log.Fatalf("SSH连接池初始化失败: %v", err)
    }
    
    // 初始化仓库层
    serverRepo := repository.NewServerRepository(db)
    
    // 初始化服务层
    serverService := service.NewServerService(serverRepo)
    monitorService := service.NewMonitorService(sshPool)
    
    // 初始化处理器
    serverHandler := handler.NewServerHandler(serverService, monitorService)
    
    // 初始化定时任务
    scheduler := cron.NewScheduler(monitorService, serverService)
    scheduler.Start()
    defer scheduler.Stop()
    
    // 设置Gin路由
    router := gin.Default()
    
    api := router.Group("/api/v1")
    {
        servers := api.Group("/servers")
        {
            servers.POST("", serverHandler.CreateServer)
            servers.GET("", serverHandler.ListServers)
            servers.GET("/:id", serverHandler.GetServer)
            servers.GET("/:id/stats", serverHandler.GetServerStats)
            servers.POST("/batch-command", serverHandler.BatchCommand)
        }
    }
    
    // 启动HTTP服务器
    srv := &http.Server{
        Addr:    fmt.Sprintf(":%d", cfg.Server.Port),
        Handler: router,
    }
    
    go func() {
        log.Printf("服务器启动在端口 %d", cfg.Server.Port)
        if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
            log.Fatalf("服务器启动失败: %v", err)
        }
    }()
    
    // 等待中断信号
    quit := make(chan os.Signal, 1)
    signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
    <-quit
    
    log.Println("正在关闭服务器...")
}

项目特色功能

1. 实时WebSocket监控

go 复制代码
// internal/handler/websocket.go
package handler

import (
    "encoding/json"
    "time"
    
    "github.com/gin-gonic/gin"
    "github.com/gorilla/websocket"
)

var upgrader = websocket.Upgrader{
    CheckOrigin: func(r *http.Request) bool {
        return true
    },
}

func (h *ServerHandler) MonitorWebSocket(c *gin.Context) {
    conn, err := upgrader.Upgrade(c.Writer, c.Request, nil)
    if err != nil {
        return
    }
    defer conn.Close()
    
    serverID, _ := strconv.Atoi(c.Param("id"))
    server, err := h.serverService.GetByID(uint(serverID))
    if err != nil {
        conn.WriteJSON(gin.H{"error": "服务器不存在"})
        return
    }
    
    ticker := time.NewTicker(5 * time.Second)
    defer ticker.Stop()
    
    for {
        select {
        case <-ticker.C:
            stats, err := h.monitorService.GetServerStats(server.IP, server.Port)
            if err != nil {
                conn.WriteJSON(gin.H{"error": err.Error()})
                continue
            }
            
            data, _ := json.Marshal(stats)
            if err := conn.WriteMessage(websocket.TextMessage, data); err != nil {
                return
            }
        }
    }
}

2. 配置文件示例

yaml 复制代码
# config.yaml
server:
  port: 8080

database:
  host: localhost
  port: 5432
  user: opsuser
  password: opspassword
  dbname: ops_platform

ssh:
  private_key_path: /path/to/private/key
  timeout: 30

logging:
  level: info
  file: /var/log/ops-platform.log

总结

这个Golang运维平台实战项目展示了:

  1. 现代化架构设计:清晰的分层架构,模块化设计
  2. 高性能连接管理:SSH连接池避免频繁建立连接的开销
  3. 实时监控能力:支持WebSocket实时数据推送
  4. 批量操作支持:高效的批量命令执行机制
  5. 可扩展性:易于添加新的监控指标和运维功能

通过具体的代码实现,展示了如何用Golang构建一个功能完整、性能优异的运维管理平台,为实际生产环境提供了可靠的技术基础。

相关推荐
喵个咪7 小时前
go-kratos-admin 快速上手指南:从环境搭建到启动服务(Windows/macOS/Linux 通用)
vue.js·go
码一行7 小时前
Eino AI 实战: Eino 的文档加载与解析
后端·go
码一行7 小时前
Eino AI 实战:DuckDuckGo 搜索工具 V1 与 V2
后端·go
代码扳手8 小时前
Golang 高效内网文件传输实战:零拷贝、断点续传与 Protobuf 指令解析(含完整源码)
后端·go
zhuyasen20 小时前
Go Web 开发利器:如何让你的 Gin 服务拥有 Nginx 般的静态文件处理能力?
nginx·go·gin
爷_20 小时前
Golang: sqlc 和 goose 最佳实践
后端·go·全栈
喵个咪1 天前
go-kratos-admin 技术栈深度解析:为什么选 Golang+Vue3 这套组合?
vue.js·go
码一行1 天前
Go.1.25.4 和 Go.1.24.10 发布了!!
后端·go
码一行1 天前
从0到1用Go撸一个AI应用?Eino框架让你效率翻倍!
后端·go