Django博客项目集成Celery实现定时心跳监控系统
项目背景
在Django博客项目中,我们需要实现一个网络监控系统,能够定时检测网络连通性(如ping baidu.com、google.com等目标),并提供Web界面和API接口来查看监控结果。这个需求催生了本次技术实践:集成Celery异步任务队列,实现定时心跳检测功能。
技术架构
核心组件
- Django: Web框架,提供管理界面和API
- Celery: 异步任务队列,处理心跳检测任务
- Redis: 消息代理和缓存后端
- django-celery-beat: 定时任务调度器
- django-celery-results: 任务结果存储
系统架构图
markdown
┌─────────────────┐ ┌──────────────┐ ┌─────────────────┐
│ Django Web │ │ Redis │ │ Celery Worker │
│ │ │ │ │ │
│ ┌─────────────┐ │ │ ┌──────────┐ │ │ ┌─────────────┐ │
│ │ 管理界面 │ │◄──►│ │ 队列DB0 │ │◄──►│ │ 心跳检测 │ │
│ │ API接口 │ │ │ │ 结果DB1 │ │ │ │ HTTP检测 │ │
│ │ 网络监控 │ │ │ │ 缓存DB2 │ │ │ │ 健康汇总 │ │
│ └─────────────┘ │ │ └──────────┘ │ │ └─────────────┘ │
└─────────────────┘ └──────────────┘ └─────────────────┘
▲ │
│ ┌──────────────┐ │
└────────────│ Celery Beat │◄─────────────┘
│ 定时调度器 │
└──────────────┘
实现步骤
1. 依赖安装与配置
首先在requirements.txt
中添加必要的依赖:
txt
# Celery相关
celery==5.3.4
django-celery-beat==2.5.0
django-celery-results==2.5.1
redis==5.0.1
eventlet==0.33.3 # Windows下的异步支持
2. Celery配置
创建djangoblog/celery.py
:
python
"""
Celery configuration for djangoblog project.
"""
import os
from celery import Celery
from django.conf import settings
# 设置Django默认设置模块
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'djangoblog.settings')
app = Celery('djangoblog')
# 使用Django设置文件配置Celery
app.config_from_object('django.conf:settings', namespace='CELERY')
# 自动发现任务
app.autodiscover_tasks()
# Celery配置
app.conf.update(
# 任务序列化
task_serializer='json',
accept_content=['json'],
result_serializer='json',
timezone=settings.TIME_ZONE,
enable_utc=True,
# 任务路由
task_routes={
'blog.tasks.send_email_task': {'queue': 'email'},
'blog.tasks.update_search_index_task': {'queue': 'search'},
'blog.tasks.notify_search_engines_task': {'queue': 'seo'},
'comments.tasks.send_comment_notification_task': {'queue': 'email'},
'servermanager.tasks.heartbeat_ping_task': {'queue': 'monitoring'},
'servermanager.tasks.heartbeat_http_task': {'queue': 'monitoring'},
'servermanager.tasks.network_connectivity_monitor': {'queue': 'monitoring'},
},
# 任务重试配置
task_acks_late=True,
worker_prefetch_multiplier=1,
# 结果后端
result_backend=f'redis://{os.environ.get("DJANGO_REDIS_URL", "127.0.0.1:6379")}/1',
# 代理设置
broker_url=f'redis://{os.environ.get("DJANGO_REDIS_URL", "127.0.0.1:6379")}/0',
# 任务过期时间
result_expires=3600,
# 任务限流
task_annotations={
'blog.tasks.send_email_task': {'rate_limit': '10/m'},
'blog.tasks.notify_search_engines_task': {'rate_limit': '5/m'},
'servermanager.tasks.heartbeat_ping_task': {'rate_limit': '60/m'},
'servermanager.tasks.heartbeat_http_task': {'rate_limit': '30/m'},
}
)
@app.task(bind=True)
def debug_task(self):
print(f'Request: {self.request!r}')
3. Django设置更新
在djangoblog/settings.py
中添加Celery相关配置:
python
# Celery配置
CELERY_BROKER_URL = f'redis://{os.environ.get("DJANGO_REDIS_URL", "127.0.0.1:6379")}/0'
CELERY_RESULT_BACKEND = f'redis://{os.environ.get("DJANGO_REDIS_URL", "127.0.0.1:6379")}/1'
CELERY_ACCEPT_CONTENT = ['json']
CELERY_TASK_SERIALIZER = 'json'
CELERY_RESULT_SERIALIZER = 'json'
CELERY_TIMEZONE = TIME_ZONE
# 注册Celery相关应用
INSTALLED_APPS = [
# ...existing apps...
'django_celery_beat',
'django_celery_results',
]
# 使用Redis作为缓存(用于任务结果共享)
if os.environ.get("DJANGO_REDIS_URL"):
CACHES = {
'default': {
'BACKEND': 'django.core.cache.backends.redis.RedisCache',
'LOCATION': f'redis://{os.environ.get("DJANGO_REDIS_URL")}/2',
}
}
4. 心跳检测任务实现
创建servermanager/tasks.py
:
python
"""
网络监控相关的异步任务
"""
import logging
import platform
import subprocess
import requests
from datetime import datetime, timedelta
from celery import shared_task
from django.core.cache import cache
from django.utils import timezone
logger = logging.getLogger(__name__)
@shared_task(bind=True)
def heartbeat_ping_task(self, target_host='baidu.com', timeout=5):
"""
心跳任务 - ping指定主机检查网络连通性
Args:
target_host: 目标主机地址
timeout: 超时时间(秒)
Returns:
dict: 包含ping结果的字典
"""
try:
# 根据操作系统选择ping命令参数
system = platform.system().lower()
if system == 'windows':
# Windows: ping -n 1 -w 5000 baidu.com
cmd = ['ping', '-n', '1', '-w', str(timeout * 1000), target_host]
else:
# Linux/Mac: ping -c 1 -W 5 baidu.com
cmd = ['ping', '-c', '1', '-W', str(timeout), target_host]
# 执行ping命令
start_time = datetime.now()
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=timeout + 2
)
end_time = datetime.now()
# 计算响应时间
response_time = (end_time - start_time).total_seconds() * 1000
# 判断ping是否成功
is_success = result.returncode == 0
ping_result = {
'timestamp': timezone.now().isoformat(),
'target_host': target_host,
'is_success': is_success,
'response_time_ms': round(response_time, 2),
'return_code': result.returncode,
'stdout': result.stdout,
'stderr': result.stderr
}
# 记录日志
if is_success:
logger.info(f"心跳检测成功 - {target_host}, 响应时间: {response_time:.2f}ms")
else:
logger.warning(f"心跳检测失败 - {target_host}, 错误: {result.stderr}")
# 将结果存储到缓存中(保存最近的检测结果)
cache_key = f'heartbeat_ping_{target_host}'
cache.set(cache_key, ping_result, timeout=300) # 缓存5分钟
# 保存历史记录到缓存(最近10次记录)
history_key = f'heartbeat_ping_history_{target_host}'
history = cache.get(history_key, [])
history.append(ping_result)
if len(history) > 10:
history = history[-10:] # 只保留最近10次记录
cache.set(history_key, history, timeout=3600) # 缓存1小时
return ping_result
except subprocess.TimeoutExpired:
error_result = {
'timestamp': timezone.now().isoformat(),
'target_host': target_host,
'is_success': False,
'response_time_ms': timeout * 1000,
'error': 'ping timeout'
}
logger.error(f"心跳检测超时 - {target_host}")
return error_result
except Exception as exc:
error_result = {
'timestamp': timezone.now().isoformat(),
'target_host': target_host,
'is_success': False,
'error': str(exc)
}
logger.error(f"心跳检测异常 - {target_host}: {exc}")
return error_result
@shared_task(bind=True)
def heartbeat_http_task(self, url, timeout=10):
"""HTTP检测任务"""
try:
start_time = datetime.now()
response = requests.get(url, timeout=timeout)
end_time = datetime.now()
response_time = (end_time - start_time).total_seconds() * 1000
is_success = response.status_code < 400
result = {
'timestamp': timezone.now().isoformat(),
'url': url,
'is_success': is_success,
'status_code': response.status_code,
'response_time_ms': round(response_time, 2)
}
# 缓存结果
cache_key = f'heartbeat_http_{url.replace("://", "_").replace("/", "_")}'
cache.set(cache_key, result, timeout=300)
return result
except Exception as exc:
error_result = {
'timestamp': timezone.now().isoformat(),
'url': url,
'is_success': False,
'error': str(exc)
}
logger.error(f"HTTP检测失败 - {url}: {exc}")
return error_result
@shared_task(bind=True)
def network_health_summary(self):
"""生成网络健康汇总"""
targets = ['baidu.com', 'google.com', '8.8.8.8']
summary = {
'timestamp': timezone.now().isoformat(),
'targets': {},
'overall_status': 'healthy'
}
failed_count = 0
for target in targets:
cache_key = f'heartbeat_ping_{target}'
latest = cache.get(cache_key, {})
if latest:
summary['targets'][target] = {
'is_success': latest.get('is_success', False),
'response_time_ms': latest.get('response_time_ms'),
'last_check': latest.get('timestamp')
}
if not latest.get('is_success', False):
failed_count += 1
else:
summary['targets'][target] = {
'is_success': False,
'error': 'No data'
}
failed_count += 1
# 判断整体健康状态
if failed_count == 0:
summary['overall_status'] = 'healthy'
elif failed_count < len(targets):
summary['overall_status'] = 'warning'
else:
summary['overall_status'] = 'critical'
# 缓存汇总结果
cache.set('network_health_summary', summary, timeout=3600)
return summary
5. Web界面和API实现
创建servermanager/views.py
:
python
from django.views.generic import TemplateView
from django.views import View
from django.http import JsonResponse
from django.contrib.admin.views.decorators import staff_member_required
from django.utils.decorators import method_decorator
from django.core.cache import cache
from .tasks import heartbeat_ping_task, heartbeat_http_task
class NetworkMonitorView(TemplateView):
"""网络监控主页"""
template_name = 'servermanager/network_monitor.html'
@method_decorator(staff_member_required)
def dispatch(self, *args, **kwargs):
return super().dispatch(*args, **kwargs)
def get_context_data(self, **kwargs):
context = super().get_context_data(**kwargs)
# 获取网络健康汇总
summary = cache.get('network_health_summary', {})
context['network_summary'] = summary
# 获取各个目标的最新状态
targets = ['baidu.com', 'google.com', '8.8.8.8']
target_status = {}
for target in targets:
cache_key = f'heartbeat_ping_{target}'
latest = cache.get(cache_key, {})
history_key = f'heartbeat_ping_history_{target}'
history = cache.get(history_key, [])
target_status[target] = {
'latest': latest,
'history': history[-5:] if history else [] # 最近5次记录
}
context['target_status'] = target_status
context['targets'] = targets
# 添加调试信息
from django.conf import settings
context['debug_info'] = {
'cache_backend': settings.CACHES['default']['BACKEND'],
'cache_location': settings.CACHES['default'].get('LOCATION', 'N/A'),
'redis_keys_count': self._get_redis_keys_count(),
'worker_status': self._check_worker_status()
}
return context
def _get_redis_keys_count(self):
"""获取Redis中的key数量"""
try:
import redis
r = redis.Redis(host='127.0.0.1', port=6379, db=2)
return len(r.keys('*heartbeat*'))
except:
return 0
def _check_worker_status(self):
"""检查Celery Worker状态"""
try:
from celery import current_app
inspect = current_app.control.inspect()
active = inspect.active()
return 'Active' if active else 'No active workers'
except:
return 'Unknown'
class NetworkStatusAPIView(View):
"""网络状态API接口"""
@method_decorator(staff_member_required)
def dispatch(self, *args, **kwargs):
return super().dispatch(*args, **kwargs)
def get(self, request):
"""获取网络状态"""
target = request.GET.get('target', 'baidu.com')
# 获取最新状态
cache_key = f'heartbeat_ping_{target}'
latest = cache.get(cache_key, {})
# 获取历史记录
history_key = f'heartbeat_ping_history_{target}'
history = cache.get(history_key, [])
return JsonResponse({
'target': target,
'latest': latest,
'history': history,
'summary': cache.get('network_health_summary', {})
})
def post(self, request):
"""手动触发网络检测"""
target = request.POST.get('target', 'baidu.com')
check_type = request.POST.get('type', 'ping') # ping 或 http
try:
if check_type == 'ping':
task = heartbeat_ping_task.delay(target)
elif check_type == 'http':
url = request.POST.get('url', f'https://www.{target}')
task = heartbeat_http_task.delay(url)
else:
return JsonResponse({
'status': 'error',
'message': '不支持的检测类型'
}, status=400)
return JsonResponse({
'status': 'success',
'message': f'{check_type.upper()}任务已提交',
'task_id': task.task_id,
'target': target,
'check_type': check_type
})
except Exception as e:
return JsonResponse({
'status': 'error',
'message': f'任务提交失败: {str(e)}'
}, status=500)
class TaskStatusAPIView(View):
"""任务状态查询API"""
@method_decorator(staff_member_required)
def dispatch(self, *args, **kwargs):
return super().dispatch(*args, **kwargs)
def get(self, request):
"""查询任务状态"""
task_id = request.GET.get('task_id')
if not task_id:
return JsonResponse({'error': '缺少task_id参数'}, status=400)
try:
from celery.result import AsyncResult
task_result = AsyncResult(task_id)
response_data = {
'task_id': task_id,
'status': task_result.status,
'ready': task_result.ready(),
'successful': task_result.successful() if task_result.ready() else None,
}
if task_result.ready():
if task_result.successful():
response_data['result'] = task_result.result
else:
response_data['error'] = str(task_result.info)
return JsonResponse(response_data)
except Exception as e:
return JsonResponse({
'error': f'查询任务状态失败: {str(e)}'
}, status=500)
6. 定时任务管理命令
创建servermanager/management/commands/setup_network_monitoring.py
:
python
from django.core.management.base import BaseCommand
from django_celery_beat.models import PeriodicTask, IntervalSchedule
import json
class Command(BaseCommand):
help = '设置网络监控定时任务'
def add_arguments(self, parser):
parser.add_argument(
'--delete',
action='store_true',
help='删除现有的网络监控任务',
)
def handle(self, *args, **options):
if options['delete']:
self.delete_tasks()
else:
self.create_tasks()
def create_tasks(self):
"""创建定时任务"""
# 创建每分钟的调度器
schedule, created = IntervalSchedule.objects.get_or_create(
every=1,
period=IntervalSchedule.MINUTES,
)
# 定义任务列表
tasks = [
{
'name': '网络心跳检测-DNS',
'task': 'servermanager.tasks.heartbeat_ping_task',
'kwargs': {'target_host': '8.8.8.8'},
},
{
'name': '网络心跳检测-百度',
'task': 'servermanager.tasks.heartbeat_ping_task',
'kwargs': {'target_host': 'baidu.com'},
},
{
'name': '网络心跳检测-谷歌',
'task': 'servermanager.tasks.heartbeat_ping_task',
'kwargs': {'target_host': 'google.com'},
},
]
for task_info in tasks:
task, created = PeriodicTask.objects.get_or_create(
name=task_info['name'],
defaults={
'interval': schedule,
'task': task_info['task'],
'kwargs': json.dumps(task_info['kwargs']),
}
)
if created:
self.stdout.write(
self.style.SUCCESS(f'创建定时任务: {task_info["name"]}')
)
else:
self.stdout.write(
self.style.WARNING(f'任务已存在: {task_info["name"]}')
)
# 创建健康汇总任务(每5分钟)
health_schedule, created = IntervalSchedule.objects.get_or_create(
every=5,
period=IntervalSchedule.MINUTES,
)
health_task, created = PeriodicTask.objects.get_or_create(
name='网络健康汇总',
defaults={
'interval': health_schedule,
'task': 'servermanager.tasks.network_health_summary',
}
)
if created:
self.stdout.write(
self.style.SUCCESS('创建健康汇总任务')
)
def delete_tasks(self):
"""删除定时任务"""
task_names = [
'网络心跳检测-DNS',
'网络心跳检测-百度',
'网络心跳检测-谷歌',
'网络健康汇总'
]
for name in task_names:
try:
task = PeriodicTask.objects.get(name=name)
task.delete()
self.stdout.write(
self.style.SUCCESS(f'删除任务: {name}')
)
except PeriodicTask.DoesNotExist:
self.stdout.write(
self.style.WARNING(f'任务不存在: {name}')
)
调试过程中遇到的问题及解决方案
问题1:Windows下Celery Worker权限异常
现象:
ini
PermissionError: [WinError 5] 拒绝访问
原因:Windows下prefork模式不兼容
解决方案:
- 使用threads pool替代prefork:
bash
celery -A djangoblog worker -l info --pool=threads --concurrency=4
- 或使用eventlet pool:
bash
pip install eventlet
celery -A djangoblog worker -l info --pool=eventlet --concurrency=10
问题2:任务提交但Worker未执行
现象:Beat发送任务,但Worker没有任务执行日志
调试方法:
- 检查任务路由配置:
python
# 查看队列配置
task_routes={
'servermanager.tasks.heartbeat_ping_task': {'queue': 'monitoring'},
}
- 确保Worker监听正确的队列:
bash
celery -A djangoblog worker -Q celery,email,search,seo,monitoring
- 检查Redis队列内容:
python
import redis
r = redis.Redis(host='127.0.0.1', port=6379, db=0)
print('队列长度:', r.llen('monitoring'))
问题3:缓存数据不共享
现象:任务执行成功但Web界面看不到数据
原因:Django缓存使用本地内存,Worker和Web进程不共享
解决方案:
- 设置环境变量使用Redis缓存:
bash
$env:DJANGO_REDIS_URL="127.0.0.1:6379"
- 修改Django设置:
python
CACHES = {
'default': {
'BACKEND': 'django.core.cache.backends.redis.RedisCache',
'LOCATION': f'redis://{os.environ.get("DJANGO_REDIS_URL")}/2',
}
}
问题4:任务状态一直PENDING
现象:任务提交后状态一直是PENDING
调试步骤:
- 检查任务是否真的提交到队列:
python
import redis
r = redis.Redis(host='127.0.0.1', port=6379, db=0)
keys = r.keys('*')
print(f'Redis keys: {keys}')
- 检查Worker是否接收到任务:
bash
# 启动Worker时使用debug级别日志
celery -A djangoblog worker -l debug
- 手动测试任务提交:
python
from servermanager.tasks import heartbeat_ping_task
result = heartbeat_ping_task.delay('baidu.com')
print(f'Task ID: {result.task_id}')
print(f'Status: {result.status}')
问题5:重复节点名警告
现象:
sql
DuplicateNodenameWarning: Received multiple replies from node name: celery@DESKTOP-XXX
解决方案: 为Worker指定唯一节点名:
bash
celery -A djangoblog worker -n worker1@%h
调试工具和方法
1. 缓存调试脚本
创建debug_cache.py
:
python
#!/usr/bin/env python
import os
import django
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'djangoblog.settings')
django.setup()
from django.core.cache import cache
from django.conf import settings
import redis
print("=== Django缓存配置 ===")
print(f"CACHES: {settings.CACHES}")
print(f"环境变量DJANGO_REDIS_URL: {os.environ.get('DJANGO_REDIS_URL', 'NOT SET')}")
print("\n=== 测试Django缓存读写 ===")
cache.set('test_key', 'test_value', timeout=60)
result = cache.get('test_key')
print(f"写入测试: {result}")
print("\n=== 直接检查Redis ===")
for db in range(4):
try:
r = redis.Redis(host='127.0.0.1', port=6379, db=db)
all_keys = r.keys('*')
heartbeat_keys = r.keys('*heartbeat*')
print(f"DB {db}: {len(all_keys)} total keys, {len(heartbeat_keys)} heartbeat keys")
except Exception as e:
print(f"DB {db}: Error - {e}")
print("\n=== 检查心跳缓存数据 ===")
targets = ['baidu.com', 'google.com', '8.8.8.8']
for target in targets:
cache_key = f'heartbeat_ping_{target}'
data = cache.get(cache_key, {})
print(f"{target}: {'有数据' if data else '无数据'}")
if data:
print(f" 成功: {data.get('is_success')}")
print(f" 时间: {data.get('timestamp', 'N/A')[:19]}")
2. 任务状态监控脚本
创建monitor_tasks.py
:
python
#!/usr/bin/env python
import os
import django
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'djangoblog.settings')
django.setup()
from celery import current_app
from celery.result import AsyncResult
import time
def check_worker_status():
"""检查Worker状态"""
try:
inspect = current_app.control.inspect()
# 检查活跃的workers
active = inspect.active()
print(f"活跃Workers: {list(active.keys()) if active else '无'}")
# 检查注册的任务
registered = inspect.registered()
if registered:
for worker, tasks in registered.items():
print(f"Worker {worker} 注册的任务数: {len(tasks)}")
# 检查正在执行的任务
if active:
for worker, tasks in active.items():
print(f"Worker {worker} 正在执行的任务数: {len(tasks)}")
except Exception as e:
print(f"检查Worker状态失败: {e}")
def test_task_submission():
"""测试任务提交"""
from servermanager.tasks import heartbeat_ping_task
print("提交测试任务...")
result = heartbeat_ping_task.delay('baidu.com')
print(f"任务ID: {result.task_id}")
# 监控任务状态
for i in range(10):
status = result.status
print(f"第{i+1}秒: {status}")
if result.ready():
if result.successful():
print(f"任务成功: {result.result}")
else:
print(f"任务失败: {result.info}")
break
time.sleep(1)
else:
print("任务超时或仍在执行")
if __name__ == "__main__":
print("=== Celery任务监控 ===")
check_worker_status()
print("\n=== 测试任务提交 ===")
test_task_submission()
3. Web界面调试功能
在Web界面中增加了调试信息面板,包括:
- 缓存后端配置
- Redis连接状态
- Worker状态检查
- 任务实时提交和状态监控
4. 实时任务状态API
增加了任务状态查询API:
javascript
// 前端JavaScript监控任务状态
function monitorTask(taskId) {
var interval = setInterval(function() {
$.get('/servermanager/api/task-status/', {task_id: taskId})
.done(function(data) {
updateTaskStatus(taskId, data);
if (data.ready) {
clearInterval(interval);
// 刷新页面数据
loadAllTargetStatus();
}
});
}, 1000); // 每秒检查一次
}
启动流程
1. 环境准备
bash
# 设置Redis环境变量
$env:DJANGO_REDIS_URL="127.0.0.1:6379"
# 安装依赖
pip install -r requirements.txt
# 数据库迁移
python manage.py migrate
2. 启动服务
bash
# 启动Redis(如果未启动)
redis-server
# 启动Celery Worker
celery -A djangoblog worker -l info --pool=threads --concurrency=4 -n worker1@%h -Q celery,email,search,seo,monitoring
# 启动Celery Beat(定时调度器)
celery -A djangoblog beat -l info
# 启动Django服务器
python manage.py runserver 8000
3. 创建定时任务
bash
python manage.py setup_network_monitoring
4. 访问监控界面
打开浏览器访问:http://localhost:8000/servermanager/network-monitor/
总结
通过这次实践,我们成功实现了一个完整的Django + Celery网络监控系统。主要收获包括:
- 架构设计:合理分离了任务调度、执行和结果展示
- 问题解决:掌握了Windows环境下Celery的配置技巧
- 调试方法:建立了完整的调试和监控体系
- 缓存管理:正确配置了跨进程的缓存共享
- 实时监控:实现了任务状态的实时展示
这个系统现在可以:
- 自动执行定时心跳检测(每分钟)
- 通过Web界面实时查看监控状态
- 手动触发检测任务并监控执行过程
- 提供完整的API接口供其他系统调用
- 支持历史数据查看和健康状况汇总
整个实现过程展示了现代Web应用中异步任务处理的最佳实践,为类似项目提供了完整的参考方案。