golang运维平台实战,服务树,日志监控,任务执行,分布式探测

Golang运维平台实战项目:从零构建现代化运维系统

本文将深入探讨基于Golang的运维平台实战开发,通过核心代码展示如何构建一个功能完整的运维管理系统。

项目架构与核心设计

1. 项目结构设计

go 复制代码
project/
├── cmd/
│   └── server/
│       └── main.go
├── internal/
│   ├── handler/
│   ├── service/
│   ├── repository/
│   └── model/
├── pkg/
│   ├── config/
│   ├── database/
│   ├── logger/
│   └── middleware/
└── go.mod

2. 核心模型定义

csharp 复制代码
// internal/model/server.go
package model

import (
    "time"
)

type Server struct {
    ID          uint      `json:"id" gorm:"primarykey"`
    Name        string    `json:"name" gorm:"size:100;not null"`
    IP          string    `json:"ip" gorm:"size:15;not null;uniqueIndex"`
    Port        int       `json:"port" gorm:"default:22"`
    OS          string    `json:"os" gorm:"size:50"`
    Status      string    `json:"status" gorm:"size:20;default:unknown"`
    CreatedAt   time.Time `json:"created_at"`
    UpdatedAt   time.Time `json:"updated_at"`
}

type ServerStats struct {
    ID          uint      `json:"id"`
    ServerID    uint      `json:"server_id" gorm:"index"`
    CPUUsage    float64   `json:"cpu_usage"`
    MemoryUsage float64   `json:"memory_usage"`
    DiskUsage   float64   `json:"disk_usage"`
    CreatedAt   time.Time `json:"created_at"`
}

3. 配置管理模块

go 复制代码
// pkg/config/config.go
package config

import (
    "github.com/spf13/viper"
    "log"
)

type Config struct {
    Server struct {
        Port int `mapstructure:"port"`
    } `mapstructure:"server"`
    
    Database struct {
        Host     string `mapstructure:"host"`
        Port     int    `mapstructure:"port"`
        User     string `mapstructure:"user"`
        Password string `mapstructure:"password"`
        DBName   string `mapstructure:"dbname"`
    } `mapstructure:"database"`
    
    SSH struct {
        PrivateKeyPath string `mapstructure:"private_key_path"`
        Timeout        int    `mapstructure:"timeout"`
    } `mapstructure:"ssh"`
}

func Load() *Config {
    var cfg Config
    
    viper.SetConfigName("config")
    viper.SetConfigType("yaml")
    viper.AddConfigPath(".")
    viper.AddConfigPath("./config")
    
    viper.SetDefault("server.port", 8080)
    viper.SetDefault("ssh.timeout", 30)
    
    if err := viper.ReadInConfig(); err != nil {
        log.Fatalf("Error reading config file: %v", err)
    }
    
    if err := viper.Unmarshal(&cfg); err != nil {
        log.Fatalf("Unable to decode into struct: %v", err)
    }
    
    return &cfg
}

核心功能实现

1. SSH连接池管理

go 复制代码
// pkg/ssh/pool.go
package ssh

import (
    "fmt"
    "net"
    "sync"
    "time"
    
    "golang.org/x/crypto/ssh"
)

type Connection struct {
    *ssh.Client
    LastUsed time.Time
}

type Pool struct {
    connections map[string]*Connection
    mutex       sync.RWMutex
    maxIdle     time.Duration
    config      *ssh.ClientConfig
}

func NewPool(privateKeyPath string, maxIdle time.Duration) (*Pool, error) {
    key, err := parsePrivateKey(privateKeyPath)
    if err != nil {
        return nil, err
    }
    
    config := &ssh.ClientConfig{
        User: "root",
        Auth: []ssh.AuthMethod{
            ssh.PublicKeys(key),
        },
        HostKeyCallback: ssh.InsecureIgnoreHostKey(),
        Timeout:         30 * time.Second,
    }
    
    return &Pool{
        connections: make(map[string]*Connection),
        maxIdle:     maxIdle,
        config:      config,
    }, nil
}

func (p *Pool) Get(host string, port int) (*Connection, error) {
    key := fmt.Sprintf("%s:%d", host, port)
    
    p.mutex.RLock()
    if conn, exists := p.connections[key]; exists {
        if time.Since(conn.LastUsed) < p.maxIdle {
            conn.LastUsed = time.Now()
            p.mutex.RUnlock()
            return conn, nil
        }
        // 连接过期,关闭并移除
        conn.Close()
        delete(p.connections, key)
    }
    p.mutex.RUnlock()
    
    // 创建新连接
    p.mutex.Lock()
    defer p.mutex.Unlock()
    
    address := fmt.Sprintf("%s:%d", host, port)
    client, err := ssh.Dial("tcp", address, p.config)
    if err != nil {
        return nil, fmt.Errorf("failed to dial: %v", err)
    }
    
    conn := &Connection{
        Client:   client,
        LastUsed: time.Now(),
    }
    
    p.connections[key] = conn
    return conn, nil
}

func (p *Pool) Cleanup() {
    p.mutex.Lock()
    defer p.mutex.Unlock()
    
    for key, conn := range p.connections {
        if time.Since(conn.LastUsed) > p.maxIdle {
            conn.Close()
            delete(p.connections, key)
        }
    }
}

2. 服务器监控服务

go 复制代码
// internal/service/monitor.go
package service

import (
    "bufio"
    "fmt"
    "strconv"
    "strings"
    "time"
    
    "golang.org/x/crypto/ssh"
)

type MonitorService struct {
    sshPool *ssh.Pool
}

func NewMonitorService(sshPool *ssh.Pool) *MonitorService {
    return &MonitorService{sshPool: sshPool}
}

func (s *MonitorService) GetServerStats(host string, port int) (*ServerStats, error) {
    conn, err := s.sshPool.Get(host, port)
    if err != nil {
        return nil, err
    }
    
    session, err := conn.NewSession()
    if err != nil {
        return nil, err
    }
    defer session.Close()
    
    // 获取CPU使用率
    cpuUsage, err := s.getCPUUsage(session)
    if err != nil {
        return nil, err
    }
    
    // 获取内存使用率
    memUsage, err := s.getMemoryUsage(session)
    if err != nil {
        return nil, err
    }
    
    // 获取磁盘使用率
    diskUsage, err := s.getDiskUsage(session)
    if err != nil {
        return nil, err
    }
    
    return &ServerStats{
        CPUUsage:    cpuUsage,
        MemoryUsage: memUsage,
        DiskUsage:   diskUsage,
        CreatedAt:   time.Now(),
    }, nil
}

func (s *MonitorService) getCPUUsage(session *ssh.Session) (float64, error) {
    output, err := session.Output("grep 'cpu ' /proc/stat | awk '{usage=($2+$4)*100/($2+$4+$5)} END {print usage}'")
    if err != nil {
        return 0, err
    }
    
    return strconv.ParseFloat(strings.TrimSpace(string(output)), 64)
}

func (s *MonitorService) getMemoryUsage(session *ssh.Session) (float64, error) {
    output, err := session.Output("free | grep Mem | awk '{print $3/$2 * 100.0}'")
    if err != nil {
        return 0, err
    }
    
    return strconv.ParseFloat(strings.TrimSpace(string(output)), 64)
}

func (s *MonitorService) getDiskUsage(session *ssh.Session) (float64, error) {
    output, err := session.Output("df / | awk 'NR==2 {print $5}' | sed 's/%//'")
    if err != nil {
        return 0, err
    }
    
    return strconv.ParseFloat(strings.TrimSpace(string(output)), 64)
}

// 批量执行命令
func (s *MonitorService) ExecuteBatchCommand(hosts []string, port int, command string) (map[string]string, error) {
    results := make(map[string]string)
    
    for _, host := range hosts {
        conn, err := s.sshPool.Get(host, port)
        if err != nil {
            results[host] = fmt.Sprintf("连接失败: %v", err)
            continue
        }
        
        session, err := conn.NewSession()
        if err != nil {
            results[host] = fmt.Sprintf("创建会话失败: %v", err)
            continue
        }
        
        output, err := session.CombinedOutput(command)
        session.Close()
        
        if err != nil {
            results[host] = fmt.Sprintf("执行失败: %v", err)
        } else {
            results[host] = string(output)
        }
    }
    
    return results, nil
}

3. Web API处理器

go 复制代码
// internal/handler/server.go
package handler

import (
    "net/http"
    "strconv"
    
    "github.com/gin-gonic/gin"
    "github.com/your-username/ops-platform/internal/model"
    "github.com/your-username/ops-platform/internal/service"
)

type ServerHandler struct {
    serverService *service.ServerService
    monitorService *service.MonitorService
}

func NewServerHandler(serverService *service.ServerService, monitorService *service.MonitorService) *ServerHandler {
    return &ServerHandler{
        serverService: serverService,
        monitorService: monitorService,
    }
}

func (h *ServerHandler) CreateServer(c *gin.Context) {
    var server model.Server
    if err := c.ShouldBindJSON(&server); err != nil {
        c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
        return
    }
    
    if err := h.serverService.Create(&server); err != nil {
        c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
        return
    }
    
    c.JSON(http.StatusCreated, server)
}

func (h *ServerHandler) GetServerStats(c *gin.Context) {
    id, err := strconv.Atoi(c.Param("id"))
    if err != nil {
        c.JSON(http.StatusBadRequest, gin.H{"error": "无效的服务器ID"})
        return
    }
    
    server, err := h.serverService.GetByID(uint(id))
    if err != nil {
        c.JSON(http.StatusNotFound, gin.H{"error": "服务器不存在"})
        return
    }
    
    stats, err := h.monitorService.GetServerStats(server.IP, server.Port)
    if err != nil {
        c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
        return
    }
    
    c.JSON(http.StatusOK, stats)
}

func (h *ServerHandler) BatchCommand(c *gin.Context) {
    var request struct {
        ServerIDs []uint `json:"server_ids"`
        Command   string `json:"command"`
    }
    
    if err := c.ShouldBindJSON(&request); err != nil {
        c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
        return
    }
    
    servers, err := h.serverService.GetByIDs(request.ServerIDs)
    if err != nil {
        c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
        return
    }
    
    var hosts []string
    for _, server := range servers {
        hosts = append(hosts, server.IP)
    }
    
    results, err := h.monitorService.ExecuteBatchCommand(hosts, 22, request.Command)
    if err != nil {
        c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
        return
    }
    
    c.JSON(http.StatusOK, results)
}

4. 定时任务调度器

go 复制代码
// pkg/cron/scheduler.go
package cron

import (
    "log"
    "time"
    
    "github.com/robfig/cron/v3"
    "github.com/your-username/ops-platform/internal/service"
)

type Scheduler struct {
    cron        *cron.Cron
    monitorService *service.MonitorService
    serverService  *service.ServerService
}

func NewScheduler(monitorService *service.MonitorService, serverService *service.ServerService) *Scheduler {
    return &Scheduler{
        cron:          cron.New(),
        monitorService: monitorService,
        serverService:  serverService,
    }
}

func (s *Scheduler) Start() {
    // 每5分钟收集服务器状态
    s.cron.AddFunc("*/5 * * * *", s.collectServerStats)
    
    // 每天凌晨2点清理旧数据
    s.cron.AddFunc("0 2 * * *", s.cleanupOldData)
    
    s.cron.Start()
    log.Println("定时任务调度器已启动")
}

func (s *Scheduler) Stop() {
    s.cron.Stop()
    log.Println("定时任务调度器已停止")
}

func (s *Scheduler) collectServerStats() {
    log.Println("开始收集服务器状态...")
    
    servers, err := s.serverService.GetAll()
    if err != nil {
        log.Printf("获取服务器列表失败: %v", err)
        return
    }
    
    for _, server := range servers {
        go func(server model.Server) {
            stats, err := s.monitorService.GetServerStats(server.IP, server.Port)
            if err != nil {
                log.Printf("收集服务器 %s 状态失败: %v", server.Name, err)
                return
            }
            
            stats.ServerID = server.ID
            if err := s.monitorService.SaveStats(stats); err != nil {
                log.Printf("保存服务器状态失败: %v", err)
            }
        }(server)
    }
}

func (s *Scheduler) cleanupOldData() {
    log.Println("开始清理旧数据...")
    threshold := time.Now().AddDate(0, -1, 0) // 保留一个月数据
    
    if err := s.monitorService.CleanupOldStats(threshold); err != nil {
        log.Printf("清理旧数据失败: %v", err)
    }
}

5. 主程序入口

go 复制代码
// cmd/server/main.go
package main

import (
    "log"
    "net/http"
    "os"
    "os/signal"
    "syscall"
    
    "github.com/gin-gonic/gin"
    "github.com/your-username/ops-platform/internal/handler"
    "github.com/your-username/ops-platform/internal/repository"
    "github.com/your-username/ops-platform/internal/service"
    "github.com/your-username/ops-platform/pkg/config"
    "github.com/your-username/ops-platform/pkg/cron"
    "github.com/your-username/ops-platform/pkg/database"
    "github.com/your-username/ops-platform/pkg/ssh"
)

func main() {
    // 加载配置
    cfg := config.Load()
    
    // 初始化数据库
    db, err := database.Init(&cfg.Database)
    if err != nil {
        log.Fatalf("数据库初始化失败: %v", err)
    }
    
    // 初始化SSH连接池
    sshPool, err := ssh.NewPool(cfg.SSH.PrivateKeyPath, 10*time.Minute)
    if err != nil {
        log.Fatalf("SSH连接池初始化失败: %v", err)
    }
    
    // 初始化仓库层
    serverRepo := repository.NewServerRepository(db)
    
    // 初始化服务层
    serverService := service.NewServerService(serverRepo)
    monitorService := service.NewMonitorService(sshPool)
    
    // 初始化处理器
    serverHandler := handler.NewServerHandler(serverService, monitorService)
    
    // 初始化定时任务
    scheduler := cron.NewScheduler(monitorService, serverService)
    scheduler.Start()
    defer scheduler.Stop()
    
    // 设置Gin路由
    router := gin.Default()
    
    api := router.Group("/api/v1")
    {
        servers := api.Group("/servers")
        {
            servers.POST("", serverHandler.CreateServer)
            servers.GET("", serverHandler.ListServers)
            servers.GET("/:id", serverHandler.GetServer)
            servers.GET("/:id/stats", serverHandler.GetServerStats)
            servers.POST("/batch-command", serverHandler.BatchCommand)
        }
    }
    
    // 启动HTTP服务器
    srv := &http.Server{
        Addr:    fmt.Sprintf(":%d", cfg.Server.Port),
        Handler: router,
    }
    
    go func() {
        log.Printf("服务器启动在端口 %d", cfg.Server.Port)
        if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
            log.Fatalf("服务器启动失败: %v", err)
        }
    }()
    
    // 等待中断信号
    quit := make(chan os.Signal, 1)
    signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
    <-quit
    
    log.Println("正在关闭服务器...")
}

项目特色功能

1. 实时WebSocket监控

go 复制代码
// internal/handler/websocket.go
package handler

import (
    "encoding/json"
    "time"
    
    "github.com/gin-gonic/gin"
    "github.com/gorilla/websocket"
)

var upgrader = websocket.Upgrader{
    CheckOrigin: func(r *http.Request) bool {
        return true
    },
}

func (h *ServerHandler) MonitorWebSocket(c *gin.Context) {
    conn, err := upgrader.Upgrade(c.Writer, c.Request, nil)
    if err != nil {
        return
    }
    defer conn.Close()
    
    serverID, _ := strconv.Atoi(c.Param("id"))
    server, err := h.serverService.GetByID(uint(serverID))
    if err != nil {
        conn.WriteJSON(gin.H{"error": "服务器不存在"})
        return
    }
    
    ticker := time.NewTicker(5 * time.Second)
    defer ticker.Stop()
    
    for {
        select {
        case <-ticker.C:
            stats, err := h.monitorService.GetServerStats(server.IP, server.Port)
            if err != nil {
                conn.WriteJSON(gin.H{"error": err.Error()})
                continue
            }
            
            data, _ := json.Marshal(stats)
            if err := conn.WriteMessage(websocket.TextMessage, data); err != nil {
                return
            }
        }
    }
}

2. 配置文件示例

yaml 复制代码
# config.yaml
server:
  port: 8080

database:
  host: localhost
  port: 5432
  user: opsuser
  password: opspassword
  dbname: ops_platform

ssh:
  private_key_path: /path/to/private/key
  timeout: 30

logging:
  level: info
  file: /var/log/ops-platform.log

总结

这个Golang运维平台实战项目展示了:

  1. 现代化架构设计:清晰的分层架构,模块化设计
  2. 高性能连接管理:SSH连接池避免频繁建立连接的开销
  3. 实时监控能力:支持WebSocket实时数据推送
  4. 批量操作支持:高效的批量命令执行机制
  5. 可扩展性:易于添加新的监控指标和运维功能

通过具体的代码实现,展示了如何用Golang构建一个功能完整、性能优异的运维管理平台,为实际生产环境提供了可靠的技术基础。

相关推荐
梦想很大很大1 小时前
使用 Go + Gin + Fx 构建工程化后端服务模板(gin-app 实践)
前端·后端·go
lekami_兰6 小时前
MySQL 长事务:藏在业务里的性能 “隐形杀手”
数据库·mysql·go·长事务
却尘9 小时前
一篇小白也能看懂的 Go 字符串拼接 & Builder & cap 全家桶
后端·go
ん贤10 小时前
一次批量删除引发的死锁,最终我选择不加锁
数据库·安全·go·死锁
mtngt111 天前
AI DDD重构实践
go
Grassto2 天前
12 go.sum 是如何保证依赖安全的?校验机制源码解析
安全·golang·go·哈希算法·go module
Grassto4 天前
11 Go Module 缓存机制详解
开发语言·缓存·golang·go·go module
程序设计实验室5 天前
2025年的最后一天,分享我使用go语言开发的电子书转换工具网站
go
我的golang之路果然有问题5 天前
使用 Hugo + GitHub Pages + PaperMod 主题 + Obsidian 搭建开发博客
golang·go·github·博客·个人开发·个人博客·hugo
啊汉7 天前
古文观芷App搜索方案深度解析:打造极致性能的古文搜索引擎
go·软件随想