查看binlog设置
bash
mysql> show variables like 'log_%';
+----------------------------------------+----------------------------------------+
| Variable_name | Value |
+----------------------------------------+----------------------------------------+
| log_bin | ON |
| log_bin_basename | /var/lib/mysql/binlog |
| log_bin_index | /var/lib/mysql/binlog.index |
| log_bin_trust_function_creators | OFF |
| log_error | /var/log/mysqld.log |
| log_error_services | log_filter_internal; log_sink_internal |
| log_error_suppression_list | |
| log_error_verbosity | 2 |
| log_output | FILE |
| log_queries_not_using_indexes | OFF |
| log_raw | OFF |
| log_replica_updates | ON |
| log_slave_updates | ON |
| log_slow_admin_statements | OFF |
| log_slow_extra | OFF |
| log_slow_replica_statements | OFF |
| log_slow_slave_statements | OFF |
| log_statements_unsafe_for_binlog | ON |
| log_throttle_queries_not_using_indexes | 0 |
| log_timestamps | UTC |
+----------------------------------------+----------------------------------------+
20 rows in set (0.00 sec)
mysql>
解析binlog
https://blog.csdn.net/qifei_jia/article/details/126850753
解析binlog
bash
mysqlbinlog -d tobacco --start-datetime="2024-11-04 23:59:59" --stop-datetime="2024-11-05 0:10:00" --base64-output=decode-rows -v binlog.000120 > restore/20241105.sql
参数说明
- -d 数据库
- --start-datetime 开始时间
- --stop-datetime 结束时间
- --base64-output=decode-rows 解码base64
- -v 重新构建伪SQL语句的行信息输出,-v -v会增加列类型的注释信息
文件结果
bash
#241031 0:00:00 server id 1 end_log_pos 707179152 CRC32 0xdbee580b Delete_rows: table id 1645
# at 707179152
#241031 0:00:00 server id 1 end_log_pos 707185483 CRC32 0xe1384030 Delete_rows: table id 1645
# at 707185483
#241031 0:00:00 server id 1 end_log_pos 707188769 CRC32 0x27df4bf2 Delete_rows: table id 1645 flags: STMT_END_F
### DELETE FROM `tobacco`.`report_task`
### WHERE
### @1=1606422
### @2=1
### @3=2109744
### @4=51372
### @5='{"taskId":1606422,"uploadTime":null,"status":null,"userId":null,"infoId":null,"images":null,"tobaccoInfo":{"createBy":"51372",},"tobaccoAccount":{"createBy":"51372"}}'
### @6=0
### @7='http://e.tb.cn/h.gyXbVjsqdm8bbDO?tk=MrfS3o2y0OD'
### @8='2024-10-30 09:18:46'
### @9='2024-10-30 08:40:12'
### @10=NULL
### @11=NULL
### @12=b'1'
### @13=1
### @14='ExternalDataDTO(accountName=null)'
### @15='王XX'
### @16='2024-10-30 21:50:01'
### @17=NULL
### @18='0'
重构SQL
主要python代码
python
# -*- coding:utf-8 -*-
import logging
import os
import re
from utils.util_logfile import nlogger, traceback
class GetURLError(Exception):
pass
class GetRowIterError(Exception):
pass
class GetStorageError(Exception):
pass
class GetValidateError(Exception):
pass
class DownloadVideoError(Exception):
pass
class ThreadTaskError(Exception):
pass
class WriteResultError(Exception):
pass
def exec_func(file_name, **kwargs):
"""
Executive Function
:param file_name: file name
:param kwargs:
:return:
"""
try:
print("### check_file_name")
_file_name = check_file_name(file_name, **kwargs)
print("### get_file_content")
_content = get_file_content(_file_name)
print("### get_row_object_iterator")
_row_object_iterator = get_row_object_iterator(_content, **kwargs)
print("### write_result_to_file")
write_result_to_file(_row_object_iterator)
except (GetURLError, GetRowIterError, GetStorageError, DownloadVideoError, ThreadTaskError, WriteResultError) as e:
nlogger.error('{fn} Custom error: {e}'.format(fn='exec_func', e=repr(e)))
print(f'Custom error: {repr(e)}')
except AssertionError as e:
nlogger.error('{fn} Assertion error: {e}'.format(fn='exec_func', e=repr(e)))
print(repr(e))
except Exception as e:
nlogger.error('{fn} error: {e}'.format(fn='exec_func', e=traceback.format_exc()))
print(f'Undefined error: {repr(e)}')
def check_file_name(file_name, **kwargs):
"""
check if the file exists
:param file_name:
:param kwargs:
:return:
"""
assert file_name is not None, "Parameter file_name must be provided and is not None."
_file_name = str(file_name).strip()
assert os.path.exists(_file_name), "file_name {f} does not exists".format(f=_file_name)
return _file_name
def get_file_content(file, encoding="utf-8"):
"""
Read file.
Default encoding utf-8
"""
try:
if encoding is None:
encoding = "utf-8"
content = None
with open(file, "r", encoding=encoding) as f:
content = f.readlines()
return content
except Exception as e:
nlogger.error('{fn} error: {e}'.format(fn='stream_iterator', e=traceback.format_exc()))
raise
def get_row_object_iterator(content, **kwargs):
"""
Get iterator of row object
:param content:
:param kwargs:
:return:
"""
try:
row_object_iterator = list()
row_dict = None
status = False
for line in content:
_line = str(line).strip()
if re.match('^### DELETE FROM `tobacco`.`report_task`', _line):
row_dict = dict()
status = True
continue
if status is True and row_dict is not None:
if re.match('^### @1=', _line):
row_dict["task_id"] = _line.replace('### @1=', '')
elif re.match('^### @2=', _line):
row_dict["status"] = _line.replace('### @2=', '')
elif re.match('^### @3=', _line):
row_dict["report_id"] = _line.replace('### @3=', '')
elif re.match('^### @4=', _line):
row_dict["reporter_id"] = _line.replace('### @4=', '')
elif re.match('^### @5=', _line):
row_dict["data_json"] = _line.replace('### @5=', '')
elif re.match('^### @6=', _line):
row_dict["task_type"] = _line.replace('### @6=', '')
elif re.match('^### @7=', _line):
row_dict["url"] = _line.replace('### @7=', '')
elif re.match('^### @8=', _line):
row_dict["upload_time"] = _line.replace('### @8=', '')
elif re.match('^### @9=', _line):
row_dict["create_time"] = _line.replace('### @9=', '')
elif re.match('^### @10=', _line):
row_dict["image_account"] = _line.replace('### @10=', '')
elif re.match('^### @11=', _line):
row_dict["image_content"] = _line.replace('### @11=', '')
elif re.match('^### @12=', _line):
try:
_line_list = _line.replace('### @12=', '').split("'")
if _line_list[1].isdigit():
row_dict["is_back"] = int(_line_list[1])
else:
row_dict["is_back"] = 1
except:
row_dict["is_back"] = 1
elif re.match('^### @13=', _line):
row_dict["fail_times"] = _line.replace('### @13=', '')
elif re.match('^### @14=', _line):
row_dict["remark"] = _line.replace('### @14=', '')
elif re.match('^### @15=', _line):
row_dict["create_by"] = _line.replace('### @15=', '')
elif re.match('^### @16=', _line):
row_dict["update_time"] = _line.replace('### @16=', '')
elif re.match('^### @17=', _line):
row_dict["update_by"] = _line.replace('### @17=', '')
elif re.match('^### @18=', _line):
row_dict["del_flag"] = _line.replace('### @18=', '')
row_object_iterator.append(row_dict)
status = False
else:
print("### create sql numbers: {n}".format(n=len(row_object_iterator)))
return row_object_iterator
except Exception as e:
nlogger.error('{fn} error: {e}'.format(fn='get_row_object_iterator', e=traceback.format_exc()))
raise GetRowIterError('{fn} error: {e}'.format(fn='get_row_object_iterator', e=repr(e)))
def write_result_to_file(row_object_iterator):
"""
Write result
:param row_object_iterator:
:return:
"""
try:
with open('output.sql', 'w') as f:
for row_dict in row_object_iterator:
insert_sql = None
if row_dict and isinstance(row_dict, (dict,)):
insert_sql = ("INSERT `report_task`(`task_id`,`status`,`report_id`,`reporter_id`,`data_json`,\
`task_type`,`url`,`upload_time`,`create_time`, `image_account`,`image_content`,`is_back`,`fail_times`,\
`remark`,`create_by`,`update_time`,`update_by`,`del_flag` ) VALUES({task_id},{status},{report_id},\
{reporter_id},{data_json},{task_type},{url},{upload_time},{create_time},{image_account},{image_content},\
{is_back},{fail_times},{remark},{create_by},{update_time},{update_by},{del_flag});").format(
task_id=row_dict.get('task_id'),
status=row_dict.get('status'),
report_id=row_dict.get('report_id'),
reporter_id=row_dict.get('reporter_id'),
data_json=str(row_dict.get('data_json', '')),
task_type=row_dict.get('task_type'),
url=str(row_dict.get('url', '')),
upload_time=row_dict.get('upload_time'),
create_time=row_dict.get('create_time'),
image_account=row_dict.get('image_account'),
image_content=row_dict.get('image_content'),
is_back=row_dict.get('is_back'),
fail_times=row_dict.get('fail_times'),
remark=str(row_dict.get('remark', '')),
create_by=row_dict.get('create_by'),
update_time=row_dict.get('update_time'),
update_by=row_dict.get('update_by'),
del_flag=row_dict.get('del_flag')
)
_insert_sql = insert_sql.replace("None", "NULL")
# print(_insert_sql)
print(_insert_sql, file=f)
except Exception as e:
nlogger.error("{fn} error: {e}".format(fn='write_result_to_xls', e=traceback.format_exc()))
raise WriteResultError("{fn} error: {e}".format(fn='write_result_to_xls', e=repr(e)))
if __name__ == "__main__":
# file_name = '/public/sj_app/20241101-1.sql'
file_name = '/public/sj_app/test.sql'
kwargs = dict()
exec_func(file_name, **kwargs)
查看SQL
sql
INSERT `report_task`(`task_id`,`status`,`report_id`,`reporter_id`,`data_json`, `task_type`,`url`,`upload_time`,`create_time`, `image_account`,`image_content`,`is_back`,`fail_times`, `remark`,`create_by`,`update_time`,`update_by`,`del_flag` ) VALUES(1609002,1,2112756, 38882,'{"taskId":1609002,"uploadTime":null,"status":null,"userId":null,"infoId":null,"images":null,"tobaccoInfo":{"createBy":"38882"},"tobaccoAccount":{"createBy":"38882"}}',0,'https://m.tb.cn/h.gy97GoE?tk=gNFU3odb2Oc')','王XX','2024-10-30 21:58:11',NULL,'0');
恢复数据
新建数据库
- 防止误操作
bash
mysql> create database tobacco_report_task;
新建同名表
bash
USE database tobacco_report_task;
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for report_task
-- ----------------------------
DROP TABLE IF EXISTS `report_task`;
CREATE TABLE `report_task` (
`task_id` int NOT NULL AUTO_INCREMENT COMMENT '任务id',
`status` int NULL DEFAULT NULL COMMENT '状态,0:排队中,1:已解析成功,2:解析失败',
`report_id` int NULL DEFAULT NULL COMMENT '信息id',
`reporter_id` int NULL DEFAULT NULL COMMENT '人id',
`data_json` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '数据信息内容',
`task_type` int NULL DEFAULT NULL COMMENT 'url 0,图片 1',
`url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '链接',
`upload_time` datetime NULL DEFAULT NULL COMMENT '解析截至时间',
`create_time` datetime NULL DEFAULT NULL COMMENT '创建时间',
`image_account` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '作者图片地址',
`image_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '聊天内容图片地址',
`is_back` bit(1) NULL DEFAULT NULL COMMENT '是否返回',
`fail_times` int NULL DEFAULT NULL COMMENT '失败次数',
`remark` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '备注',
`create_by` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '创建人',
`update_time` datetime NULL DEFAULT NULL COMMENT '更新时间',
`update_by` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '更新人',
`del_flag` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '删除标识 0 正常 2 删除',
PRIMARY KEY (`task_id`) USING BTREE,
INDEX `report_task_url`(`url` ASC) USING BTREE,
INDEX `report_task_report_id`(`report_id` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 2260849 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
SET FOREIGN_KEY_CHECKS = 1;
恢复数据
bash
mysql> use tobacco_report_task;
mysql> source output_20241101.sql;
后续
查验数据无误后,导入正式库表。