一、安装
1.1.下载
consul和Nomad一样,都是一个独立的二进制文件,下载后解压,并将其拷贝到/usr/local/bin/目录下即可。
从 https://releases.hashicorp.com/consul 下载,目前最新的 1.22.0
bash
unzip consul_1.22.0_linux_amd64.zip
sudo mv consul /usr/local/bin/
1.2.端口规划
grpc_tls设置为-1,表示禁用gRPC TLS。
Server节点:
- Server1: 8500 (HTTP), 8600 (DNS), 8300 (Server RPC), 8301 (Serf LAN), 8302 (Serf WAN)
- Server2: 8501 (HTTP), 8601 (DNS), 8310 (Server RPC), 8311 (Serf LAN), 8312 (Serf WAN)
- Server3: 8502 (HTTP), 8602 (DNS), 8320 (Server RPC), 8321 (Serf LAN), 8322 (Serf WAN)
Client节点:
- Client1: 8503 (HTTP), 8603 (DNS), 8330 (Server RPC), 8331 (Serf LAN), 8332 (Serf WAN)
- Client2: 8504 (HTTP), 8604 (DNS), 8340 (Server RPC), 8341 (Serf LAN), 8342 (Serf WAN)
注意:Serf WAN端口在Server节点上用于WAN gossip,在Client节点上通常不需要,但为了统一配置,我们都指定。
1.3.启动集群:
/data/consul/start-cluster.sh
停止所有consul进程:
pkill consul
1.4.验证集群状态:
检查集群成员
consul members -http-addr=http://127.0.0.1:8500
Node Address Status Type Build Protocol DC Partition Segment
server1 127.0.0.1:8301 alive server 1.22.0 2 dc1 default
server2 127.0.0.1:8311 alive server 1.22.0 2 dc1 default
server3 127.0.0.1:8321 alive server 1.22.0 2 dc1 default
client1 127.0.0.1:8331 alive client 1.22.0 2 dc1 default
client2 127.0.0.1:8341 alive client 1.22.0 2 dc1 default
检查Server节点状态
consul operator raft list-peers -http-addr=http://127.0.0.1:8500
检查节点服务
curl http://127.0.0.1:8500/v1/catalog/nodes
检查集群领导状态
curl http://127.0.0.1:8500/v1/status/leader
二、简单的http服务
见simple_app.py
安装依赖:
pip install Flask==2.3.3
conda activate qaanthing
指定端口(默认端口为5000):
PORT=5000 python simple_app.py
PORT=5001 python simple_app.py
python
from flask import Flask, jsonify
import time
import socket
import os
app = Flask(__name__)
# 服务启动时间
start_time = time.time()
# 获取主机信息
def get_host_info():
try:
hostname = socket.gethostname()
local_ip = socket.gethostbyname(hostname)
return hostname, local_ip
except:
return 'unknown', 'unknown'
# 首页
@app.route('/')
def home():
hostname, ip = get_host_info()
return jsonify({
'message': 'Hello from Simple Flask Service!',
'service': 'simple-flask-service',
'hostname': hostname,
'ip': ip,
'timestamp': time.strftime('%Y-%m-%d %H:%M:%S')
})
# 健康检查接口 - Consul会使用这个
@app.route('/health')
def health():
hostname, ip = get_host_info()
return jsonify({
'status': 'healthy',
'service': 'simple-flask-service',
'hostname': hostname,
'ip': ip,
'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'),
'uptime': round(time.time() - start_time, 2)
}), 200
# 就绪检查
@app.route('/ready')
def ready():
return jsonify({
'status': 'ready',
'timestamp': time.strftime('%Y-%m-%d %H:%M:%S')
}), 200
# 存活检查
@app.route('/live')
def live():
return jsonify({
'status': 'alive',
'timestamp': time.strftime('%Y-%m-%d %H:%M:%S')
}), 200
# 服务信息
@app.route('/info')
def info():
hostname, ip = get_host_info()
return jsonify({
'service': 'simple-flask-service',
'version': '1.0.0',
'hostname': hostname,
'ip': ip,
'python_version': os.environ.get('PYTHON_VERSION', 'unknown'),
'environment': os.environ.get('ENVIRONMENT', 'development')
})
if __name__ == '__main__':
# 获取端口,默认5000
port = int(os.environ.get('PORT', 5000))
print(f"Starting Simple Flask Service on port {port}...")
print(f"Health check available at: http://0.0.0.0:{port}/health")
app.run(host='0.0.0.0', port=port, debug=False)
三、注册服务
仅用到标准的requests库。
python register_services.py
python
#!/usr/bin/env python3
import requests
import json
import time
import sys
class ConsulServiceRegistry:
def __init__(self, consul_host='127.0.0.1', consul_port=8500):
self.consul_base_url = f"http://{consul_host}:{consul_port}/v1"
def register_service(self, service_name, service_id, address, port, tags=None, check=None):
"""
注册服务到Consul
"""
service_data = {
"Name": service_name,
"ID": service_id,
"Address": address,
"Port": port,
"Tags": tags or []
}
# 添加健康检查配置
if check:
service_data["Check"] = check
url = f"{self.consul_base_url}/agent/service/register"
try:
response = requests.put(url, json=service_data)
if response.status_code == 200:
print(f"✅ 服务注册成功: {service_name} (ID: {service_id})")
return True
else:
print(f"❌ 服务注册失败: {service_name}, 状态码: {response.status_code}")
return False
except Exception as e:
print(f"❌ 注册服务时出错: {service_name}, 错误: {str(e)}")
return False
def deregister_service(self, service_id):
"""
从Consul注销服务
"""
url = f"{self.consul_base_url}/agent/service/deregister/{service_id}"
try:
response = requests.put(url)
if response.status_code == 200:
print(f"✅ 服务注销成功: {service_id}")
return True
else:
print(f"❌ 服务注销失败: {service_id}, 状态码: {response.status_code}")
return False
except Exception as e:
print(f"❌ 注销服务时出错: {service_id}, 错误: {str(e)}")
return False
def list_services(self):
"""
列出所有注册的服务
"""
url = f"{self.consul_base_url}/agent/services"
try:
response = requests.get(url)
if response.status_code == 200:
services = response.json()
print(f"📋 当前注册的服务 ({len(services)} 个):")
for service_id, service_info in services.items():
print(f" - {service_info['Service']} ({service_id}): {service_info['Address']}:{service_info['Port']}")
return services
else:
print(f"❌ 获取服务列表失败, 状态码: {response.status_code}")
return None
except Exception as e:
print(f"❌ 获取服务列表时出错: {str(e)}")
return None
def main():
# 创建Consul注册器
consul = ConsulServiceRegistry('127.0.0.1', 8500)
# 定义要注册的两个服务
services_to_register = [
{
"name": "flask-app",
"id": "flask-app-5000",
"address": "127.0.0.1",
"port": 5000,
"tags": ["web", "python", "flask", "v1"],
"check": {
"name": "HTTP API Health Check",
"http": f"http://127.0.0.1:5000/health",
"method": "GET",
"interval": "10s",
"timeout": "5s"
}
},
{
"name": "flask-app",
"id": "flask-app-5001",
"address": "127.0.0.1",
"port": 5001,
"tags": ["web", "python", "flask", "v1"],
"check": {
"name": "HTTP API Health Check",
"http": f"http://127.0.0.1:5001/health",
"method": "GET",
"interval": "10s",
"timeout": "5s"
}
}
]
print("🚀 开始注册服务到Consul...")
# 先列出当前服务
print("\n--- 注册前的服务列表 ---")
consul.list_services()
# 注册服务
print("\n--- 注册服务 ---")
success_count = 0
for service in services_to_register:
if consul.register_service(
service["name"],
service["id"],
service["address"],
service["port"],
service["tags"],
service["check"]
):
success_count += 1
# 等待一下让注册生效
time.sleep(2)
# 显示注册后的服务列表
print("\n--- 注册后的服务列表 ---")
consul.list_services()
print(f"\n📊 注册结果: {success_count}/{len(services_to_register)} 个服务注册成功")
if success_count == len(services_to_register):
print("🎉 所有服务注册成功!")
# 显示健康检查信息
print("\n🏥 健康检查端点:")
for service in services_to_register:
print(f" - {service['id']}: {service['check']['http']}")
print("\n🔍 你可以在Consul UI中查看服务状态: http://127.0.0.1:8500")
else:
print("⚠️ 部分服务注册失败,请检查Consul状态和服务配置")
if __name__ == "__main__":
main()
四、验证服务
检查Consul中所有服务的状态
仅用到标准的requests库。
python consul_check.py
python
#!/usr/bin/env python3
import requests
import json
from datetime import datetime
def check_consul_services(consul_host="127.0.0.1", consul_port=8500):
"""
检查Consul中所有服务的状态
"""
base_url = f"http://{consul_host}:{consul_port}/v1"
print(f"🔍 检查Consul服务状态 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Consul地址: {consul_host}:{consul_port}")
print("-" * 50)
try:
# 获取所有注册的服务
services_url = f"{base_url}/agent/services"
services_response = requests.get(services_url, timeout=5)
if services_response.status_code != 200:
print(f"❌ 无法获取服务列表: HTTP {services_response.status_code}")
return
services_data = services_response.json()
if not services_data:
print("ℹ️ Consul中没有注册的服务")
return
print(f"发现 {len(services_data)} 个注册的服务:")
# 检查每个服务的健康状态
for service_id, service_info in services_data.items():
service_name = service_info['Service']
address = service_info['Address']
port = service_info['Port']
# 获取服务的健康检查状态
health_url = f"{base_url}/health/checks/{service_name}"
health_response = requests.get(health_url, timeout=5)
if health_response.status_code == 200:
health_data = health_response.json()
passing_checks = [check for check in health_data if check.get('Status') == 'passing']
status = "✅ 健康" if len(passing_checks) > 0 else "❌ 不健康"
check_count = f"{len(passing_checks)}/{len(health_data)}"
print(f" {status} {service_name} ({service_id})")
print(f" 地址: {address}:{port}")
print(f" 健康检查: {check_count}")
# 显示标签(如果有)
tags = service_info.get('Tags', [])
if tags:
print(f" 标签: {', '.join(tags)}")
print()
else:
print(f" ⚠️ 未知 {service_name} - 无法获取健康状态")
print()
except requests.exceptions.ConnectionError:
print("❌ 无法连接到Consul,请确保Consul正在运行")
except requests.exceptions.Timeout:
print("❌ 连接Consul超时")
except Exception as e:
print(f"❌ 检查服务状态时出错: {e}")
def check_specific_service(service_name, consul_host="127.0.0.1", consul_port=8500):
"""
检查特定服务的状态
"""
base_url = f"http://{consul_host}:{consul_port}/v1"
print(f"🔍 检查服务 '{service_name}' 的状态")
print("-" * 50)
try:
# 获取服务的健康检查状态
health_url = f"{base_url}/health/checks/{service_name}"
health_response = requests.get(health_url, timeout=5)
if health_response.status_code != 200:
print(f"❌ 无法获取服务 '{service_name}' 的状态: HTTP {health_response.status_code}")
return
health_data = health_response.json()
if not health_data:
print(f"ℹ️ 服务 '{service_name}' 没有健康检查或不存在")
return
# 按服务实例分组
instances = {}
for check in health_data:
service_id = check.get('ServiceID', 'unknown')
if service_id not in instances:
instances[service_id] = {
'name': check.get('ServiceName', 'unknown'),
'address': check.get('ServiceAddress', 'unknown'),
'port': check.get('ServicePort', 'unknown'),
'checks': []
}
instances[service_id]['checks'].append({
'name': check.get('Name', 'unknown'),
'status': check.get('Status', 'unknown'),
'output': check.get('Output', '')
})
print(f"服务 '{service_name}' 有 {len(instances)} 个实例:")
for instance_id, instance_info in instances.items():
passing_checks = [c for c in instance_info['checks'] if c['status'] == 'passing']
status = "✅ 健康" if len(passing_checks) == len(instance_info['checks']) else "❌ 不健康"
print(f" {status} {instance_id}")
print(f" 地址: {instance_info['address']}:{instance_info['port']}")
print(f" 检查结果: {len(passing_checks)}/{len(instance_info['checks'])} 通过")
# 显示失败的检查详情
for check in instance_info['checks']:
icon = "✅" if check['status'] == 'passing' else "❌"
print(f" {icon} {check['name']}: {check['status']}")
if check['status'] != 'passing' and check['output']:
print(f" 详情: {check['output']}")
print()
except Exception as e:
print(f"❌ 检查服务 '{service_name}' 时出错: {e}")
def get_healthy_instances(service_name, consul_host="127.0.0.1", consul_port=8500):
"""
获取特定服务的所有健康实例
"""
base_url = f"http://{consul_host}:{consul_port}/v1"
try:
# 使用健康端点获取健康的服务实例
health_url = f"{base_url}/health/service/{service_name}?passing=true"
response = requests.get(health_url, timeout=5)
if response.status_code != 200:
print(f"❌ 无法获取健康实例: HTTP {response.status_code}")
return []
instances_data = response.json()
healthy_instances = []
for instance in instances_data:
service_info = instance['Service']
healthy_instances.append({
'id': service_info['ID'],
'name': service_info['Service'],
'address': service_info['Address'],
'port': service_info['Port'],
'tags': service_info.get('Tags', [])
})
return healthy_instances
except Exception as e:
print(f"❌ 获取健康实例时出错: {e}")
return []
if __name__ == "__main__":
# 检查所有服务状态
check_consul_services()
print("\n" + "="*50)
print("详细服务检查")
print("="*50)
# 检查特定服务
check_specific_service("flask-app")
# 获取健康的服务实例
healthy_instances = get_healthy_instances("flask-app")
if healthy_instances:
print(f"🎯 发现 {len(healthy_instances)} 个健康的 'flask-app' 实例:")
for instance in healthy_instances:
print(f" - {instance['id']}: {instance['address']}:{instance['port']}")
else:
print("ℹ️ 没有发现健康的 'flask-app' 实例")
会有类似如下输出:
服务 'flask-app' 有 2 个实例:
✅ 健康 flask-app-5000
地址: unknown:unknown
检查结果: 1/1 通过
✅ HTTP API Health Check: passing
❌ 不健康 flask-app-5001
地址: unknown:unknown
检查结果: 0/1 通过
❌ HTTP API Health Check: critical
详情: Get "http://127.0.0.1:5001/health": dial tcp 127.0.0.1:5001: connect: connection refused