项目综述
- master负责提供api接口,接收命令后通过grpc调用worker服务完成命令。
- master通过观察consul服务器自动发现worker客户端,实现新增,注销。
- worker负责master分配任务的完成
- watch服务,通过k8s api观察worker service是否创建,并自动往consul中注册及注销worker service服务,支持通过正则匹配发现用户输入的service name
项目代码
- watch服务
go
package main
import (
"context"
"fmt"
"log"
"os"
"regexp"
"sync"
consulapi "github.com/hashicorp/consul/api"
"github.com/spf13/cobra"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
)
var config string
var namespace string
var serviceName string
func main() {
rootCmd := &cobra.Command{
Use: "k8sdnsquery",
PreRun: func(cmd *cobra.Command, args []string) {
_, err := os.Stat(config)
if err != nil {
log.Fatalf("配置文件 %s 不存在", config)
}
},
Run: func(cmd *cobra.Command, args []string) {
clientset := createClient(config)
fmt.Printf("获取clientset成功\n")
wg := sync.WaitGroup{}
wg.Add(1)
go watchServices(clientset, namespace, serviceName, &wg)
wg.Wait()
},
}
//添加flags,支持输入config文件路径,命令空间,正则匹配的service名
rootCmd.Flags().StringVarP(&config, "config", "c", "/root/.kube/config", "config file")
rootCmd.Flags().StringVarP(&namespace, "namespace", "n", "default", "namespace")
rootCmd.Flags().StringVarP(&serviceName, "service", "s", "", "service name")
err := rootCmd.Execute()
if err != nil {
log.Fatalf("Failed to execute root command: %v", err)
}
}
//通过配置文件创建k8s客户端
func createClient(config string) *kubernetes.Clientset {
//读取配置文件
cfg, err := clientcmd.BuildConfigFromFlags("", config)
if err != nil {
log.Fatalf("Failed to build config: %v", err)
}
//创建clientset
clientset, err := kubernetes.NewForConfig(cfg)
if err != nil {
log.Fatalf("Failed to create client: %v", err)
}
return clientset
}
// func getServices(clientset *kubernetes.Clientset, namespace string) []v1.Service {
// services, err := clientset.CoreV1().Services(namespace).List(context.TODO(), metav1.ListOptions{})
// if err != nil {
// log.Fatalf("Failed to get services: %v", err)
// }
// return services.Items
// }
// func getEndpointsFromService(clientset *kubernetes.Clientset, service v1.Service) {
// //获取service的endpoints
// endpoints, err := clientset.CoreV1().Endpoints(service.Namespace).Get(context.TODO(), service.Name, metav1.GetOptions{})
// if err != nil {
// log.Fatalf("Failed to get endpoints: %v", err)
// }
// //获取endpoints的ip和port
// for _, subset := range endpoints.Subsets {
// for _, port := range subset.Ports {
// for _, address := range subset.Addresses {
// fmt.Printf("ip: %s, port: %d, protocol: %s\n", address.IP, port.Port, port.Protocol)
// }
// }
// }
// }
// func watchPods(clientset *kubernetes.Clientset, namespace string) {
// watchInterface, err := clientset.CoreV1().Pods(namespace).Watch(context.TODO(), metav1.ListOptions{})
// if err != nil {
// log.Fatalf("Failed to watch pods: %v", err)
// }
// defer watchInterface.Stop()
// //监听pod事件
// fmt.Printf("开始监听pod事件\n")
// for event := range watchInterface.ResultChan() {
// pod, ok := event.Object.(*v1.Pod)
// if !ok {
// log.Fatalf("event.Object is not a Pod")
// }
// fmt.Printf("pod: %s, ip: %s, phase: %s\n", pod.Name, pod.Status.PodIP, pod.Status.Phase)
// }
// }
// 监听指定名字的service
func watchServices(clientset *kubernetes.Clientset, namespace string, name string, wg *sync.WaitGroup) {
// 使用正则表达式匹配服务名
pattern := regexp.MustCompile(name)
watchInterface, err := clientset.CoreV1().Services(namespace).Watch(context.TODO(), metav1.ListOptions{})
if err != nil {
log.Fatalf("Failed to watch services: %v", err)
}
defer watchInterface.Stop()
//监听service事件
fmt.Printf("开始监听service事件,使用正则表达式: %s\n", name)
for event := range watchInterface.ResultChan() {
service, ok := event.Object.(*v1.Service)
if !ok {
log.Fatalf("event.Object is not a Service")
}
// 使用正则表达式匹配服务名
if pattern.MatchString(service.Name) {
fmt.Printf("service: %s, ip: %s, port: %d,namespace: %s\n", service.Name, service.Spec.ClusterIP, service.Spec.Ports[0].Port, service.Namespace)
fmt.Printf("event.Type: %s\n", event.Type)
//如果event.Type为ADDED,则注册服务
if event.Type == "ADDED" {
registerService(service.Name, service.Spec.ClusterIP, int(service.Spec.Ports[0].Port), service.Namespace)
}
//如果event.Type为DELETED,则注销服务
if event.Type == "DELETED" {
deregisterService(service.Name, service.Namespace)
}
}
}
wg.Done()
}
//服务注册
func registerService(serviceName string, serviceIP string, servicePort int, namespace string) {
//创建consul客户端
config := consulapi.DefaultConfig()
// 172.31.6.101:8500为consul服务地址
config.Address = "172.31.6.101:8500"
consulClient, err := consulapi.NewClient(config)
if err != nil {
log.Fatalf("Failed to create consul client: %v", err)
}
//定义服务注册信息
serviceID := serviceName + "-" + namespace
registration := &consulapi.AgentServiceRegistration{
ID: serviceID,
Name: serviceName + "-" + namespace,
Address: serviceIP,
Port: servicePort,
Tags: []string{"go", "k8sdnsquery"},
Check: &consulapi.AgentServiceCheck{
TCP: fmt.Sprintf("%s:%d", serviceIP, servicePort),
Interval: "10s",
Timeout: "5s",
Notes: "Health check for k8sdnsquery",
},
}
err = consulClient.Agent().ServiceRegister(registration)
if err != nil {
log.Fatalf("Failed to register service: %v", err)
}
fmt.Printf("服务 %s 注册成功\n", serviceName)
}
//服务注销
func deregisterService(serviceName string, namespace string) {
//创建consul客户端
config := consulapi.DefaultConfig()
config.Address = "172.31.6.101:8500"
consulClient, err := consulapi.NewClient(config)
if err != nil {
log.Fatalf("Failed to create consul client: %v", err)
}
//注销服务
err = consulClient.Agent().ServiceDeregister(serviceName + "-" + namespace)
if err != nil {
log.Fatalf("Failed to deregister service: %v", err)
}
fmt.Printf("服务 %s 注销成功\n", serviceName)
}
- worker服务
grpc通信的实现请参考我之前发布的这篇文章:juejin.cn/post/748275...
go
package main
import (
"context"
"log"
"net"
"taskschedu/task"
"google.golang.org/grpc"
)
type Worker struct {
task.UnimplementedWorkerServer
}
func (w *Worker) ExecuteTask(ctx context.Context, req *task.TaskRequest) (*task.TaskResponse, error) {
// cmd := exec.Command(req.Command)
// output, err := cmd.Output()
// if err != nil {
// return nil, status.Errorf(codes.Internal, "failed to execute task: %v", err)
// }
log.Printf("Executing task: %v with id: %v", req.Command, req.TaskId)
return &task.TaskResponse{
TaskId: req.TaskId,
// Result: string(output),
Result: "Task executed successfully",
}, nil
}
func main() {
lis, err := net.Listen("tcp", ":50051")
if err != nil {
log.Fatalf("failed to listen: %v", err)
}
s := grpc.NewServer()
task.RegisterWorkerServer(s, &Worker{})
log.Printf("server listening at %v", lis.Addr())
if err := s.Serve(lis); err != nil {
log.Fatalf("failed to serve: %v", err)
}
}
- master服务
go
#worker pool,实现服务发现,自动注册及注销
package workerpool
import (
"context"
"fmt"
"regexp"
"sync"
"taskschedu/task"
"time"
consulapi "github.com/hashicorp/consul/api"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
var (
workerNodes []string
currentNodeIndex int
mu sync.Mutex
stopChan chan struct{}
)
// 之前直接把worker地址写死在代码中
// var workerNodes = []string{"worker:50051"}
// var currentNodeIndex = 0
// var mu sync.Mutex
// StartWorkerDiscovery 开始worker节点发现
func StartWorkerDiscovery(servicePattern string) error {
stopChan = make(chan struct{})
go discoverWorkers(servicePattern)
return nil
}
// StopWorkerDiscovery 停止worker节点发现
func StopWorkerDiscovery() {
if stopChan != nil {
close(stopChan)
}
}
// discoverWorkers 定期从Consul发现worker节点
func discoverWorkers(servicePattern string) {
ticker := time.NewTicker(10 * time.Second)
defer ticker.Stop()
for {
select {
case <-stopChan:
return
case <-ticker.C:
updateWorkerNodes(servicePattern)
}
}
}
// updateWorkerNodes 更新worker节点列表
func updateWorkerNodes(servicePattern string) {
config := consulapi.DefaultConfig()
config.Address = "172.31.6.101:8500"
consulClient, err := consulapi.NewClient(config)
if err != nil {
fmt.Printf("创建 consul 客户端失败: %v\n", err)
return
}
// 获取所有服务
services, err := consulClient.Agent().Services()
if err != nil {
fmt.Printf("获取服务列表失败: %v\n", err)
return
}
// 编译正则表达式
pattern, err := regexp.Compile(servicePattern)
if err != nil {
fmt.Printf("正则表达式编译失败: %v\n", err)
return
}
// 查找匹配的服务
var newNodes []string
for id, service := range services {
if pattern.MatchString(id) {
// 构建gRPC连接地址
nodeAddr := fmt.Sprintf("%s:%d", service.Address, service.Port)
newNodes = append(newNodes, nodeAddr)
}
}
// 更新worker节点列表
mu.Lock()
workerNodes = newNodes
if currentNodeIndex >= len(workerNodes) {
currentNodeIndex = 0
}
mu.Unlock()
fmt.Printf("更新worker节点列表: %v\n", newNodes)
}
// GetNextNode 获取下一个可用的worker节点
func GetNextNode() string {
mu.Lock()
defer mu.Unlock()
if len(workerNodes) == 0 {
return ""
}
currentNodeIndex = (currentNodeIndex + 1) % len(workerNodes)
return workerNodes[currentNodeIndex]
}
func DispatchTaskToWorker(taskID string, command string) (string, error) {
node := GetNextNode()
if node == "" {
return "", fmt.Errorf("没有可用的worker节点")
}
conn, err := grpc.NewClient(node, grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil {
return "", fmt.Errorf("连接worker失败: %v", err)
}
defer conn.Close()
client := task.NewWorkerClient(conn)
ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
defer cancel()
response, err := client.ExecuteTask(ctx, &task.TaskRequest{
TaskId: taskID,
Command: command,
})
if err != nil {
return "", fmt.Errorf("执行任务失败: %v", err)
}
return response.Result, nil
}
go
#任务调度的实现,并实现api接口
package schedutask
import (
"log"
"net/http"
"sync"
"taskschedu/master/monitor"
"taskschedu/master/workerpool"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
)
type Task struct {
ID string
Command string
Status string
Result string
}
var tasks = make(chan Task, 100)
var taskStore sync.Map
func ScheduleTask(retryCount int) {
for task := range tasks {
go ExecuteTaskWithRetry(task, retryCount)
}
}
//任务执行及重试
func ExecuteTaskWithRetry(task Task, retryCount int) {
for i := 0; i < retryCount; i++ {
log.Printf("Executing task: %v with retry %d", task.ID, i+1)
result, err := workerpool.DispatchTaskToWorker(task.ID, task.Command)
if err == nil {
log.Printf("Task %v Successfully executed with result: %v", task.ID, result)
task.Status = "completed"
task.Result = result
taskStore.Store(task.ID, task)
monitor.TrackTaskStatus("completed")
return
}
}
log.Printf("Task %v failed after %d retries", task.ID, retryCount)
task.Status = "failed"
task.Result = "Task failed after retries"
taskStore.Store(task.ID, task)
monitor.TrackTaskStatus("failed")
}
//接收任务命令api
func SubmitTask(c *gin.Context) {
var req struct {
Command string `json:"command"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
task := Task{
ID: uuid.New().String(),
Command: req.Command,
Status: "pending",
Result: "",
}
tasks <- task
c.JSON(http.StatusOK, gin.H{"message": "Task submitted successfully", "task_id": task.ID})
}
//接收任务状态查询api
func GetTask(c *gin.Context) {
id := c.Param("id")
task, ok := taskStore.Load(id)
if !ok {
c.JSON(http.StatusNotFound, gin.H{"error": "Task not found"})
return
}
c.JSON(http.StatusOK, task)
}
go
#main主函数
package main
import (
"log"
"os"
"os/signal"
"syscall"
"taskschedu/master/monitor"
"taskschedu/master/schedutask"
"taskschedu/master/workerpool"
"github.com/gin-gonic/gin"
)
var retryCount = 3
func main() {
// 启动worker发现服务
err := workerpool.StartWorkerDiscovery("^worker-.*")
if err != nil {
log.Fatalf("启动worker发现服务失败: %v", err)
}
// 确保在程序退出时停止worker发现服务
defer workerpool.StopWorkerDiscovery()
// 启动HTTP API服务
r := gin.Default()
r.POST("/task", schedutask.SubmitTask)
r.GET("/task/:id", schedutask.GetTask)
go schedutask.ScheduleTask(retryCount)
go monitor.StartMonitor()
// 优雅关闭
go func() {
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
<-sigChan
log.Println("正在关闭服务...")
os.Exit(0)
}()
// 启动HTTP服务
log.Println("Master HTTP服务启动在 :8080")
r.Run(":8080")
}
项目k8s部署的yaml文件
- worker.yaml
yaml
root@master:/usr/local/src# cat worker.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: worker-deployment
labels:
app: worker
spec:
replicas: 1
selector:
matchLabels:
app: worker
template:
metadata:
labels:
app: worker
spec:
containers:
- name: worker
image: workerapp:v0.1
ports:
- containerPort: 50051
restartPolicy: Always
---
apiVersion: v1
kind: Service
metadata:
name: worker-service
labels:
app: worker
spec:
type: ClusterIP
ports:
- port: 50051
targetPort: 50051
protocol: TCP
selector:
app: worker
- master.yaml
yaml
root@master:/usr/local/src# cat master.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: master-deployment
labels:
app: master
spec:
replicas: 1
selector:
matchLabels:
app: master
template:
metadata:
labels:
app: master
spec:
containers:
- name: master
image: masterapp:v0.3
ports:
- containerPort: 8080
restartPolicy: Always
---
apiVersion: v1
kind: Service
metadata:
name: master-service
labels:
app: master
spec:
type: NodePort
ports:
- port: 8080
targetPort: 8080
protocol: TCP
selector:
app: master
运行watch服务后,部署worker,可以实现服务自动发现。接着部署master服务,可以动态更新worker服务列表,并自动连接。