- Master 服务器(任务管理、负载均衡、任务队列、日志、监控)
- Worker 服务器(任务执行、gRPC 交互)
- Prometheus 监控
- 日志系统
使用 Gin 实现 REST API,gRPC 实现 Master-Worker 交互,并添加 Prometheus 监控 。
💡 项目结构
📌 第一步:定义 gRPC 通信协议
文件:proto/task.proto
ini
syntax = "proto3";
package task;
option go_package = "./task;task";
service Worker {
rpc ExecuteTask (TaskRequest) returns (TaskResponse);
}
message TaskRequest {
string task_id = 1;
string command = 2;
}
message TaskResponse {
string task_id = 1;
string result = 2;
}
然后在项目根目录运行:
sh
protoc --go_out=. --go-grpc_out=. proto/task.proto
生成 task/task.pb.go
和 task/task_grpc.pb.go
。
📌 第二步:实现 Master 服务器
文件:master/main.go
go
package main
import (
"log"
"taskschedu/master/monitor"
"taskschedu/master/schedutask"
"github.com/gin-gonic/gin"
)
var retryCount = 3
func main() {
r := gin.Default()
r.POST("/task", schedutask.SubmitTask)
r.GET("/task/:id", schedutask.GetTask)
go schedutask.ScheduleTask(retryCount)
go monitor.StartMonitor()
log.Println("Master started on port 8080")
r.Run(":8080")
}
📌 任务队列 & 负载均衡
文件:master/schedutask/schedutask.go
go
package schedutask
import (
"log"
"net/http"
"sync"
"taskschedu/master/monitor"
"taskschedu/master/workerpool"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
)
type Task struct {
ID string
Command string
Status string
Result string
}
var tasks = make(chan Task, 100)
var taskStore sync.Map
func ScheduleTask(retryCount int) {
for task := range tasks {
go ExecuteTaskWithRetry(task, retryCount)
}
}
func ExecuteTaskWithRetry(task Task, retryCount int) {
for i := 0; i < retryCount; i++ {
log.Printf("Executing task: %v with retry %d", task.ID, i+1)
result, err := workerpool.DispatchTaskToWorker(task.ID, task.Command)
if err == nil {
log.Printf("Task %v Successfully executed with result: %v", task.ID, result)
task.Status = "completed"
task.Result = result
taskStore.Store(task.ID, task)
monitor.TrackTaskStatus("completed")
return
}
}
log.Printf("Task %v failed after %d retries", task.ID, retryCount)
task.Status = "failed"
task.Result = "Task failed after retries"
taskStore.Store(task.ID, task)
monitor.TrackTaskStatus("failed")
}
func SubmitTask(c *gin.Context) {
var req struct {
Command string `json:"command"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
task := Task{
ID: uuid.New().String(),
Command: req.Command,
Status: "pending",
Result: "",
}
tasks <- task
c.JSON(http.StatusOK, gin.H{"message": "Task submitted successfully", "task_id": task.ID})
}
func GetTask(c *gin.Context) {
id := c.Param("id")
task, ok := taskStore.Load(id)
if !ok {
c.JSON(http.StatusNotFound, gin.H{"error": "Task not found"})
return
}
c.JSON(http.StatusOK, task)
}
📌 负载均衡
文件:master/workerpool/workpool.go
go
package workerpool
import (
"context"
"sync"
"taskschedu/task"
"time"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
var workerNodes = []string{"127.0.0.1:50051"}
var currentNodeIndex = 0
var mu sync.Mutex
func GetNextNode() string {
mu.Lock()
defer mu.Unlock()
currentNodeIndex = (currentNodeIndex + 1) % len(workerNodes)
return workerNodes[currentNodeIndex]
}
func DispatchTaskToWorker(taskID string, command string) (string, error) {
node := GetNextNode()
conn, err := grpc.NewClient(node, grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil {
return "", err
}
defer conn.Close()
client := task.NewWorkerClient(conn)
ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
defer cancel()
response, err := client.ExecuteTask(ctx, &task.TaskRequest{
TaskId: taskID,
Command: command,
})
if err != nil {
return "", err
}
return response.Result, nil
}
📌 监控
文件:master/monitor/monitor.go
go
package monitor
import (
"log"
"net/http"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
var taskCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "task_scheduler",
Subsystem: "master",
Name: "task_counter",
Help: "Total number of tasks processed",
},
[]string{"status"},
)
func init() {
prometheus.MustRegister(taskCounter)
}
func TrackTaskStatus(status string) {
taskCounter.WithLabelValues(status).Inc()
}
func StartMonitor() {
http.Handle("/metrics", promhttp.Handler())
log.Println("Metrics server started on port 8081")
log.Fatal(http.ListenAndServe(":8081", nil))
}
📌 第三步:实现 Worker
文件:worker/main.go
go
package main
import (
"context"
"log"
"net"
"taskschedu/task"
"google.golang.org/grpc"
)
type Worker struct {
task.UnimplementedWorkerServer
}
func (w *Worker) ExecuteTask(ctx context.Context, req *task.TaskRequest) (*task.TaskResponse, error) {
// cmd := exec.Command(req.Command)
// output, err := cmd.Output()
// if err != nil {
// return nil, status.Errorf(codes.Internal, "failed to execute task: %v", err)
// }
log.Printf("Executing task: %v with id: %v", req.Command, req.TaskId)
return &task.TaskResponse{
TaskId: req.TaskId,
// Result: string(output),
Result: "Task executed successfully",
}, nil
}
func main() {
lis, err := net.Listen("tcp", ":50051")
if err != nil {
log.Fatalf("failed to listen: %v", err)
}
s := grpc.NewServer()
task.RegisterWorkerServer(s, &Worker{})
log.Printf("server listening at %v", lis.Addr())
if err := s.Serve(lis); err != nil {
log.Fatalf("failed to serve: %v", err)
}
}
📌 运行
sh
go run master/main.go
go run worker/main.go
✅ 完成 Master 和 Worker 整合
✅ 支持任务队列、负载均衡、监控
✅ 完整的 gRPC 和 REST API