Golang运维平台实战项目:从零构建现代化运维系统
本文将深入探讨基于Golang的运维平台实战开发,通过核心代码展示如何构建一个功能完整的运维管理系统。
项目架构与核心设计
1. 项目结构设计
go
project/
├── cmd/
│ └── server/
│ └── main.go
├── internal/
│ ├── handler/
│ ├── service/
│ ├── repository/
│ └── model/
├── pkg/
│ ├── config/
│ ├── database/
│ ├── logger/
│ └── middleware/
└── go.mod
2. 核心模型定义
csharp
// internal/model/server.go
package model
import (
"time"
)
type Server struct {
ID uint `json:"id" gorm:"primarykey"`
Name string `json:"name" gorm:"size:100;not null"`
IP string `json:"ip" gorm:"size:15;not null;uniqueIndex"`
Port int `json:"port" gorm:"default:22"`
OS string `json:"os" gorm:"size:50"`
Status string `json:"status" gorm:"size:20;default:unknown"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type ServerStats struct {
ID uint `json:"id"`
ServerID uint `json:"server_id" gorm:"index"`
CPUUsage float64 `json:"cpu_usage"`
MemoryUsage float64 `json:"memory_usage"`
DiskUsage float64 `json:"disk_usage"`
CreatedAt time.Time `json:"created_at"`
}
3. 配置管理模块
go
// pkg/config/config.go
package config
import (
"github.com/spf13/viper"
"log"
)
type Config struct {
Server struct {
Port int `mapstructure:"port"`
} `mapstructure:"server"`
Database struct {
Host string `mapstructure:"host"`
Port int `mapstructure:"port"`
User string `mapstructure:"user"`
Password string `mapstructure:"password"`
DBName string `mapstructure:"dbname"`
} `mapstructure:"database"`
SSH struct {
PrivateKeyPath string `mapstructure:"private_key_path"`
Timeout int `mapstructure:"timeout"`
} `mapstructure:"ssh"`
}
func Load() *Config {
var cfg Config
viper.SetConfigName("config")
viper.SetConfigType("yaml")
viper.AddConfigPath(".")
viper.AddConfigPath("./config")
viper.SetDefault("server.port", 8080)
viper.SetDefault("ssh.timeout", 30)
if err := viper.ReadInConfig(); err != nil {
log.Fatalf("Error reading config file: %v", err)
}
if err := viper.Unmarshal(&cfg); err != nil {
log.Fatalf("Unable to decode into struct: %v", err)
}
return &cfg
}
核心功能实现
1. SSH连接池管理
go
// pkg/ssh/pool.go
package ssh
import (
"fmt"
"net"
"sync"
"time"
"golang.org/x/crypto/ssh"
)
type Connection struct {
*ssh.Client
LastUsed time.Time
}
type Pool struct {
connections map[string]*Connection
mutex sync.RWMutex
maxIdle time.Duration
config *ssh.ClientConfig
}
func NewPool(privateKeyPath string, maxIdle time.Duration) (*Pool, error) {
key, err := parsePrivateKey(privateKeyPath)
if err != nil {
return nil, err
}
config := &ssh.ClientConfig{
User: "root",
Auth: []ssh.AuthMethod{
ssh.PublicKeys(key),
},
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
Timeout: 30 * time.Second,
}
return &Pool{
connections: make(map[string]*Connection),
maxIdle: maxIdle,
config: config,
}, nil
}
func (p *Pool) Get(host string, port int) (*Connection, error) {
key := fmt.Sprintf("%s:%d", host, port)
p.mutex.RLock()
if conn, exists := p.connections[key]; exists {
if time.Since(conn.LastUsed) < p.maxIdle {
conn.LastUsed = time.Now()
p.mutex.RUnlock()
return conn, nil
}
// 连接过期,关闭并移除
conn.Close()
delete(p.connections, key)
}
p.mutex.RUnlock()
// 创建新连接
p.mutex.Lock()
defer p.mutex.Unlock()
address := fmt.Sprintf("%s:%d", host, port)
client, err := ssh.Dial("tcp", address, p.config)
if err != nil {
return nil, fmt.Errorf("failed to dial: %v", err)
}
conn := &Connection{
Client: client,
LastUsed: time.Now(),
}
p.connections[key] = conn
return conn, nil
}
func (p *Pool) Cleanup() {
p.mutex.Lock()
defer p.mutex.Unlock()
for key, conn := range p.connections {
if time.Since(conn.LastUsed) > p.maxIdle {
conn.Close()
delete(p.connections, key)
}
}
}
2. 服务器监控服务
go
// internal/service/monitor.go
package service
import (
"bufio"
"fmt"
"strconv"
"strings"
"time"
"golang.org/x/crypto/ssh"
)
type MonitorService struct {
sshPool *ssh.Pool
}
func NewMonitorService(sshPool *ssh.Pool) *MonitorService {
return &MonitorService{sshPool: sshPool}
}
func (s *MonitorService) GetServerStats(host string, port int) (*ServerStats, error) {
conn, err := s.sshPool.Get(host, port)
if err != nil {
return nil, err
}
session, err := conn.NewSession()
if err != nil {
return nil, err
}
defer session.Close()
// 获取CPU使用率
cpuUsage, err := s.getCPUUsage(session)
if err != nil {
return nil, err
}
// 获取内存使用率
memUsage, err := s.getMemoryUsage(session)
if err != nil {
return nil, err
}
// 获取磁盘使用率
diskUsage, err := s.getDiskUsage(session)
if err != nil {
return nil, err
}
return &ServerStats{
CPUUsage: cpuUsage,
MemoryUsage: memUsage,
DiskUsage: diskUsage,
CreatedAt: time.Now(),
}, nil
}
func (s *MonitorService) getCPUUsage(session *ssh.Session) (float64, error) {
output, err := session.Output("grep 'cpu ' /proc/stat | awk '{usage=($2+$4)*100/($2+$4+$5)} END {print usage}'")
if err != nil {
return 0, err
}
return strconv.ParseFloat(strings.TrimSpace(string(output)), 64)
}
func (s *MonitorService) getMemoryUsage(session *ssh.Session) (float64, error) {
output, err := session.Output("free | grep Mem | awk '{print $3/$2 * 100.0}'")
if err != nil {
return 0, err
}
return strconv.ParseFloat(strings.TrimSpace(string(output)), 64)
}
func (s *MonitorService) getDiskUsage(session *ssh.Session) (float64, error) {
output, err := session.Output("df / | awk 'NR==2 {print $5}' | sed 's/%//'")
if err != nil {
return 0, err
}
return strconv.ParseFloat(strings.TrimSpace(string(output)), 64)
}
// 批量执行命令
func (s *MonitorService) ExecuteBatchCommand(hosts []string, port int, command string) (map[string]string, error) {
results := make(map[string]string)
for _, host := range hosts {
conn, err := s.sshPool.Get(host, port)
if err != nil {
results[host] = fmt.Sprintf("连接失败: %v", err)
continue
}
session, err := conn.NewSession()
if err != nil {
results[host] = fmt.Sprintf("创建会话失败: %v", err)
continue
}
output, err := session.CombinedOutput(command)
session.Close()
if err != nil {
results[host] = fmt.Sprintf("执行失败: %v", err)
} else {
results[host] = string(output)
}
}
return results, nil
}
3. Web API处理器
go
// internal/handler/server.go
package handler
import (
"net/http"
"strconv"
"github.com/gin-gonic/gin"
"github.com/your-username/ops-platform/internal/model"
"github.com/your-username/ops-platform/internal/service"
)
type ServerHandler struct {
serverService *service.ServerService
monitorService *service.MonitorService
}
func NewServerHandler(serverService *service.ServerService, monitorService *service.MonitorService) *ServerHandler {
return &ServerHandler{
serverService: serverService,
monitorService: monitorService,
}
}
func (h *ServerHandler) CreateServer(c *gin.Context) {
var server model.Server
if err := c.ShouldBindJSON(&server); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
if err := h.serverService.Create(&server); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusCreated, server)
}
func (h *ServerHandler) GetServerStats(c *gin.Context) {
id, err := strconv.Atoi(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "无效的服务器ID"})
return
}
server, err := h.serverService.GetByID(uint(id))
if err != nil {
c.JSON(http.StatusNotFound, gin.H{"error": "服务器不存在"})
return
}
stats, err := h.monitorService.GetServerStats(server.IP, server.Port)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, stats)
}
func (h *ServerHandler) BatchCommand(c *gin.Context) {
var request struct {
ServerIDs []uint `json:"server_ids"`
Command string `json:"command"`
}
if err := c.ShouldBindJSON(&request); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
servers, err := h.serverService.GetByIDs(request.ServerIDs)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
var hosts []string
for _, server := range servers {
hosts = append(hosts, server.IP)
}
results, err := h.monitorService.ExecuteBatchCommand(hosts, 22, request.Command)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, results)
}
4. 定时任务调度器
go
// pkg/cron/scheduler.go
package cron
import (
"log"
"time"
"github.com/robfig/cron/v3"
"github.com/your-username/ops-platform/internal/service"
)
type Scheduler struct {
cron *cron.Cron
monitorService *service.MonitorService
serverService *service.ServerService
}
func NewScheduler(monitorService *service.MonitorService, serverService *service.ServerService) *Scheduler {
return &Scheduler{
cron: cron.New(),
monitorService: monitorService,
serverService: serverService,
}
}
func (s *Scheduler) Start() {
// 每5分钟收集服务器状态
s.cron.AddFunc("*/5 * * * *", s.collectServerStats)
// 每天凌晨2点清理旧数据
s.cron.AddFunc("0 2 * * *", s.cleanupOldData)
s.cron.Start()
log.Println("定时任务调度器已启动")
}
func (s *Scheduler) Stop() {
s.cron.Stop()
log.Println("定时任务调度器已停止")
}
func (s *Scheduler) collectServerStats() {
log.Println("开始收集服务器状态...")
servers, err := s.serverService.GetAll()
if err != nil {
log.Printf("获取服务器列表失败: %v", err)
return
}
for _, server := range servers {
go func(server model.Server) {
stats, err := s.monitorService.GetServerStats(server.IP, server.Port)
if err != nil {
log.Printf("收集服务器 %s 状态失败: %v", server.Name, err)
return
}
stats.ServerID = server.ID
if err := s.monitorService.SaveStats(stats); err != nil {
log.Printf("保存服务器状态失败: %v", err)
}
}(server)
}
}
func (s *Scheduler) cleanupOldData() {
log.Println("开始清理旧数据...")
threshold := time.Now().AddDate(0, -1, 0) // 保留一个月数据
if err := s.monitorService.CleanupOldStats(threshold); err != nil {
log.Printf("清理旧数据失败: %v", err)
}
}
5. 主程序入口
go
// cmd/server/main.go
package main
import (
"log"
"net/http"
"os"
"os/signal"
"syscall"
"github.com/gin-gonic/gin"
"github.com/your-username/ops-platform/internal/handler"
"github.com/your-username/ops-platform/internal/repository"
"github.com/your-username/ops-platform/internal/service"
"github.com/your-username/ops-platform/pkg/config"
"github.com/your-username/ops-platform/pkg/cron"
"github.com/your-username/ops-platform/pkg/database"
"github.com/your-username/ops-platform/pkg/ssh"
)
func main() {
// 加载配置
cfg := config.Load()
// 初始化数据库
db, err := database.Init(&cfg.Database)
if err != nil {
log.Fatalf("数据库初始化失败: %v", err)
}
// 初始化SSH连接池
sshPool, err := ssh.NewPool(cfg.SSH.PrivateKeyPath, 10*time.Minute)
if err != nil {
log.Fatalf("SSH连接池初始化失败: %v", err)
}
// 初始化仓库层
serverRepo := repository.NewServerRepository(db)
// 初始化服务层
serverService := service.NewServerService(serverRepo)
monitorService := service.NewMonitorService(sshPool)
// 初始化处理器
serverHandler := handler.NewServerHandler(serverService, monitorService)
// 初始化定时任务
scheduler := cron.NewScheduler(monitorService, serverService)
scheduler.Start()
defer scheduler.Stop()
// 设置Gin路由
router := gin.Default()
api := router.Group("/api/v1")
{
servers := api.Group("/servers")
{
servers.POST("", serverHandler.CreateServer)
servers.GET("", serverHandler.ListServers)
servers.GET("/:id", serverHandler.GetServer)
servers.GET("/:id/stats", serverHandler.GetServerStats)
servers.POST("/batch-command", serverHandler.BatchCommand)
}
}
// 启动HTTP服务器
srv := &http.Server{
Addr: fmt.Sprintf(":%d", cfg.Server.Port),
Handler: router,
}
go func() {
log.Printf("服务器启动在端口 %d", cfg.Server.Port)
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
log.Fatalf("服务器启动失败: %v", err)
}
}()
// 等待中断信号
quit := make(chan os.Signal, 1)
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
<-quit
log.Println("正在关闭服务器...")
}
项目特色功能
1. 实时WebSocket监控
go
// internal/handler/websocket.go
package handler
import (
"encoding/json"
"time"
"github.com/gin-gonic/gin"
"github.com/gorilla/websocket"
)
var upgrader = websocket.Upgrader{
CheckOrigin: func(r *http.Request) bool {
return true
},
}
func (h *ServerHandler) MonitorWebSocket(c *gin.Context) {
conn, err := upgrader.Upgrade(c.Writer, c.Request, nil)
if err != nil {
return
}
defer conn.Close()
serverID, _ := strconv.Atoi(c.Param("id"))
server, err := h.serverService.GetByID(uint(serverID))
if err != nil {
conn.WriteJSON(gin.H{"error": "服务器不存在"})
return
}
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
for {
select {
case <-ticker.C:
stats, err := h.monitorService.GetServerStats(server.IP, server.Port)
if err != nil {
conn.WriteJSON(gin.H{"error": err.Error()})
continue
}
data, _ := json.Marshal(stats)
if err := conn.WriteMessage(websocket.TextMessage, data); err != nil {
return
}
}
}
}
2. 配置文件示例
yaml
# config.yaml
server:
port: 8080
database:
host: localhost
port: 5432
user: opsuser
password: opspassword
dbname: ops_platform
ssh:
private_key_path: /path/to/private/key
timeout: 30
logging:
level: info
file: /var/log/ops-platform.log
总结
这个Golang运维平台实战项目展示了:
- 现代化架构设计:清晰的分层架构,模块化设计
- 高性能连接管理:SSH连接池避免频繁建立连接的开销
- 实时监控能力:支持WebSocket实时数据推送
- 批量操作支持:高效的批量命令执行机制
- 可扩展性:易于添加新的监控指标和运维功能
通过具体的代码实现,展示了如何用Golang构建一个功能完整、性能优异的运维管理平台,为实际生产环境提供了可靠的技术基础。