Nginx访问日志分析与处理
用 Python 实现 Nginx 日志的解析、筛选、存储与回读全流程
一、日志格式
1.224.235.228 - - [06/May/2025:14:47:15 +0000] "GET /login HTTP/1.1" 500 1665 "http://example.com/api/data" "PostmanRuntime/7.28.4"
按空格拆分,关键字段索引:
| 字段 | 索引 | 处理 |
|---|---|---|
| IP | [0] |
直接取 |
| 时间 | [3] |
去掉开头 [ |
| 方法 | [5] |
去掉开头 " |
| 路径 | [6] |
直接取 |
| 状态码 | [8] |
直接取 |
二、时间筛选
python
import datetime
def log_filter_by_time(log_file, start_time, end_time):
result = []
with open(log_file, 'r', encoding='utf-8') as file:
for line in file:
parts = line.split()
log_time = datetime.datetime.strptime(parts[3][1:], '%d/%b/%Y:%H:%M:%S')
if start_time <= log_time <= end_time:
result.append({
'IP': parts[0],
'Time': parts[3][1:],
'Method': parts[5][1:],
'Path': parts[6],
'Stats_Code': parts[8],
})
return result
三、JSON存储与回读
python
import json
def save_data_to_json(result, json_file):
with open(json_file, 'w', encoding='utf-8') as file:
json.dump(result, file, indent=2, ensure_ascii=False)
print(f"数据已保存至 {json_file}")
def load_data_to_python(json_file):
with open(json_file, 'r', encoding='utf-8') as file:
return json.load(file)
参数说明:
indent=2--- 格式化输出,方便阅读ensure_ascii=False--- 正确显示中文
四、程序入口
python
if __name__ == '__main__':
log_file = './nginx_access.log'
start_time = datetime.datetime.strptime('09/May/2025:15:00:00', '%d/%b/%Y:%H:%M:%S')
end_time = datetime.datetime.strptime('09/May/2025:15:30:00', '%d/%b/%Y:%H:%M:%S')
log_filter = log_filter_by_time(log_file, start_time, end_time)
save_data_to_json(log_filter, './xxx.json')
log_result = load_data_to_python('./xxx.json')
for i in log_result:
print(i)
五、源码

python
"""
1.224.235.228 - - [06/May/2025:14:47:15 +0000] "GET /login HTTP/1.1" 500 1665 "http://example.com/api/data" "PostmanRuntime/7.28.4"
拆解索引位置:
IP -> 0 -> [0]
Time -> 3 -> [3][1:]
Method -> 5 -> [5][1:]
Path -> 6 -> [6]
Stats_Code -> 8 -> [8]
"Nginx 访问日志时间筛选工具"
"""
import json
import datetime
# 定义日志筛选函数 --> 筛选出符合要求的信息
def log_filter_by_time(log_file,start_time,end_time):
result = []
with open(log_file,'r',encoding='utf-8') as file:
for line in file:
parts = line.split()
log_time = datetime.datetime.strptime(parts[3][1:],'%d/%b/%Y:%H:%M:%S')
if start_time <= log_time <= end_time:
result.append({
'IP':parts[0],
'Time':parts[3][1:],
'Method':parts[5][1:],
'Path':parts[6],
'Stats_Code':parts[8],
})
return result
# 定义信息存储函数 --> 将筛选出的Python列表-->Json数据文件
def save_data_to_json(result,json_file):
with open(json_file,'w',encoding='utf-8') as file:
json.dump(result,file,indent=2,ensure_ascii=False)
print(f"恭喜,数据已经保存至{json_file}文件中,你可以去查看一下!")
# 定义Json读取函数 --> 将Json数据转换成Python对象
def load_data_to_python(json_file):
with open(json_file,'r',encoding='utf-8') as file:
result = json.load(file)
return result
# 定义程序入口
if __name__ == '__main__':
log_file = './nginx_access.log'
start_time = datetime.datetime.strptime('09/May/2025:15:00:00','%d/%b/%Y:%H:%M:%S')
end_time = datetime.datetime.strptime('09/May/2025:15:30:00','%d/%b/%Y:%H:%M:%S')
log_filter = log_filter_by_time(log_file,start_time,end_time)
save_data_to_json(log_filter,'./xxx.json')
log_result = load_data_to_python('./xxx.json')
for i in log_result:
print(i)

