go实现分布式任务调度系统

  • Master 服务器(任务管理、负载均衡、任务队列、日志、监控)
  • Worker 服务器(任务执行、gRPC 交互)
  • Prometheus 监控
  • 日志系统

使用 Gin 实现 REST API,gRPC 实现 Master-Worker 交互,并添加 Prometheus 监控


💡 项目结构


📌 第一步:定义 gRPC 通信协议

文件:proto/task.proto

ini 复制代码
syntax = "proto3";
package task;
option go_package = "./task;task";

service Worker {
  rpc ExecuteTask (TaskRequest) returns (TaskResponse);
}

message TaskRequest {
  string task_id = 1;
  string command = 2;
}

message TaskResponse {
  string task_id = 1;
  string result = 2;
}

然后在项目根目录运行:

sh 复制代码
protoc --go_out=. --go-grpc_out=. proto/task.proto

生成 task/task.pb.gotask/task_grpc.pb.go


📌 第二步:实现 Master 服务器

文件:master/main.go

go 复制代码
package main

import (
	"log"
	"taskschedu/master/monitor"
	"taskschedu/master/schedutask"

	"github.com/gin-gonic/gin"
)

var retryCount = 3

func main() {
	r := gin.Default()
	r.POST("/task", schedutask.SubmitTask)
	r.GET("/task/:id", schedutask.GetTask)
	go schedutask.ScheduleTask(retryCount)
	go monitor.StartMonitor()
	log.Println("Master started on port 8080")
	r.Run(":8080")
}

📌 任务队列 & 负载均衡

文件:master/schedutask/schedutask.go

go 复制代码
package schedutask

import (
	"log"
	"net/http"
	"sync"

	"taskschedu/master/monitor"
	"taskschedu/master/workerpool"

	"github.com/gin-gonic/gin"
	"github.com/google/uuid"
)

type Task struct {
	ID      string
	Command string
	Status  string
	Result  string
}

var tasks = make(chan Task, 100)
var taskStore sync.Map

func ScheduleTask(retryCount int) {
	for task := range tasks {
		go ExecuteTaskWithRetry(task, retryCount)
	}
}

func ExecuteTaskWithRetry(task Task, retryCount int) {
	for i := 0; i < retryCount; i++ {
		log.Printf("Executing task: %v with retry %d", task.ID, i+1)
		result, err := workerpool.DispatchTaskToWorker(task.ID, task.Command)
		if err == nil {
			log.Printf("Task %v Successfully executed with result: %v", task.ID, result)
			task.Status = "completed"
			task.Result = result
			taskStore.Store(task.ID, task)
			monitor.TrackTaskStatus("completed")
			return
		}
	}
	log.Printf("Task %v failed after %d retries", task.ID, retryCount)
	task.Status = "failed"
	task.Result = "Task failed after retries"
	taskStore.Store(task.ID, task)
	monitor.TrackTaskStatus("failed")
}

func SubmitTask(c *gin.Context) {
	var req struct {
		Command string `json:"command"`
	}
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}
	task := Task{
		ID:      uuid.New().String(),
		Command: req.Command,
		Status:  "pending",
		Result:  "",
	}
	tasks <- task
	c.JSON(http.StatusOK, gin.H{"message": "Task submitted successfully", "task_id": task.ID})
}

func GetTask(c *gin.Context) {
	id := c.Param("id")
	task, ok := taskStore.Load(id)
	if !ok {
		c.JSON(http.StatusNotFound, gin.H{"error": "Task not found"})
		return
	}
	c.JSON(http.StatusOK, task)
}

📌 负载均衡

文件:master/workerpool/workpool.go

go 复制代码
package workerpool

import (
	"context"
	"sync"
	"taskschedu/task"
	"time"

	"google.golang.org/grpc"
	"google.golang.org/grpc/credentials/insecure"
)

var workerNodes = []string{"127.0.0.1:50051"}
var currentNodeIndex = 0
var mu sync.Mutex

func GetNextNode() string {
	mu.Lock()
	defer mu.Unlock()
	currentNodeIndex = (currentNodeIndex + 1) % len(workerNodes)
	return workerNodes[currentNodeIndex]
}

func DispatchTaskToWorker(taskID string, command string) (string, error) {
	node := GetNextNode()
	conn, err := grpc.NewClient(node, grpc.WithTransportCredentials(insecure.NewCredentials()))
	if err != nil {
		return "", err
	}
	defer conn.Close()

	client := task.NewWorkerClient(conn)
	ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
	defer cancel()
	response, err := client.ExecuteTask(ctx, &task.TaskRequest{
		TaskId:  taskID,
		Command: command,
	})
	if err != nil {
		return "", err
	}
	return response.Result, nil
}

📌 监控

文件:master/monitor/monitor.go

go 复制代码
package monitor

import (
	"log"
	"net/http"

	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/promhttp"
)

var taskCounter = prometheus.NewCounterVec(
	prometheus.CounterOpts{
		Namespace: "task_scheduler",
		Subsystem: "master",
		Name:      "task_counter",
		Help:      "Total number of tasks processed",
	},
	[]string{"status"},
)

func init() {
	prometheus.MustRegister(taskCounter)
}

func TrackTaskStatus(status string) {
	taskCounter.WithLabelValues(status).Inc()
}

func StartMonitor() {
	http.Handle("/metrics", promhttp.Handler())
	log.Println("Metrics server started on port 8081")
	log.Fatal(http.ListenAndServe(":8081", nil))
}

📌 第三步:实现 Worker

文件:worker/main.go

go 复制代码
package main

import (
	"context"
	"log"
	"net"
	"taskschedu/task"

	"google.golang.org/grpc"
)

type Worker struct {
	task.UnimplementedWorkerServer
}

func (w *Worker) ExecuteTask(ctx context.Context, req *task.TaskRequest) (*task.TaskResponse, error) {
	// cmd := exec.Command(req.Command)
	// output, err := cmd.Output()
	// if err != nil {
	// 	return nil, status.Errorf(codes.Internal, "failed to execute task: %v", err)
	// }
	log.Printf("Executing task: %v with id: %v", req.Command, req.TaskId)

	return &task.TaskResponse{
		TaskId: req.TaskId,
		// Result: string(output),
		Result: "Task executed successfully",
	}, nil
}

func main() {
	lis, err := net.Listen("tcp", ":50051")
	if err != nil {
		log.Fatalf("failed to listen: %v", err)
	}
	s := grpc.NewServer()

	task.RegisterWorkerServer(s, &Worker{})
	log.Printf("server listening at %v", lis.Addr())
	if err := s.Serve(lis); err != nil {
		log.Fatalf("failed to serve: %v", err)
	}
}

📌 运行

sh 复制代码
go run master/main.go
go run worker/main.go

完成 Master 和 Worker 整合

支持任务队列、负载均衡、监控

完整的 gRPC 和 REST API

相关推荐
Vespeng2 小时前
Go 项目实战:全局异常处理
go
喵个咪5 小时前
开箱即用的GO后台管理系统 Kratos Admin - 如何进行Docker部署后端
后端·微服务·go
HappyChan9 小时前
Prometheus 存储结构浅析
云原生·go·监控
陈明勇1 天前
chromem-go:Go 语言 RAG 应用的高效轻量级向量数据库
后端·go
upsilon1 天前
golang-开发中获取变量类型的方法
后端·go
程序员爱钓鱼1 天前
从零到精通:用go+vue语言打造高效多语言博客系统的完整指南
后端·go·vuex
一个热爱生活的普通人1 天前
Gin 参数校验:从基础到自定义规则
后端·go·gin
江湖十年1 天前
在 Go 中使用 cron 执行定时任务
后端·面试·go
Vespeng2 天前
Go 项目实战:如何优雅的处理日志
go