一、背景
最近遇到了 MySQL 大事务的问题,但是登录服务器查看的时候大事务已经生效了,因为业务代码里都是显示提交的,所以可以通过 binlog 解析来做
二、处理
1.解析binlog
首先我们可以根据报错时间或者日志缩小binlog的范围
powershell
mysqlbinlog --base64-output=decode-rows -vv --include-gtids="7b81cda8-3206-11ee-8b54-1070fd7d5a4a:4613840380-4613849380" mysql-bin.010164 >mysql_binlog_res.txt
2.解析大事务
然后根据我们解析出来的文件进行解析,最终找出大事务
powershell
python3 big_txn_binlog.py mysql_binlog_res.txt
- 输出结果如下
powershell
共发现 1 个大事务
=== 大事务 #1 ===
状态:已提交
起始方式:显式 BEGIN
文本行号:3026147 - 3171783
开始位点:334471183
结束位点:335204044
开始时间:260421 18:45:30
结束时间:260421 18:45:56
影响行数:2272
涉及表:******
- 脚本
python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import argparse
import re
def _new_transaction(start_pos, start_time, start_line, implicit=False):
return {
'start_pos': start_pos,
'start_time': start_time,
'start_line': start_line,
'rows': 0,
'tables': set(),
'implicit': implicit,
'status': '进行中'
}
def _close_transaction(transactions, current_txn, end_pos, end_time, end_line, row_threshold, status):
if current_txn is None:
return None
current_txn['end_pos'] = end_pos
current_txn['end_time'] = end_time
current_txn['end_line'] = end_line
current_txn['status'] = status
if current_txn['rows'] >= row_threshold:
transactions.append(current_txn)
return None
def analyze_binlog_large_transactions(txt_file, row_threshold=1000):
"""分析 mysqlbinlog 导出的文本文件中的大事务。"""
transactions = []
current_txn = None
current_pos = "0"
current_end_pos = "0"
current_time = ""
pos_pattern = re.compile(r'^# at (\d+)')
time_pattern = re.compile(r'^#(\d{6})\s+(\d{1,2}:\d{2}:\d{2})')
end_pos_pattern = re.compile(r'end_log_pos (\d+)')
xid_pattern = re.compile(r'\bXid = \d+\b')
row_change_pattern = re.compile(
r'^###\s*(INSERT INTO|UPDATE|DELETE FROM)\s+'
r'(?:(?:`(?P<db_bt>[^`]+)`|(?P<db_plain>\w+))\.)?'
r'(?:`(?P<table_bt>[^`]+)`|(?P<table_plain>\w+))'
)
last_line_no = 0
with open(txt_file, 'r', encoding='utf-8', errors='replace') as infile:
for line_no, raw_line in enumerate(infile, 1):
last_line_no = line_no
line = raw_line.rstrip('\r\n')
stripped = line.strip()
# 提取 position
pos_match = pos_pattern.match(line)
if pos_match:
current_pos = pos_match.group(1)
# 提取时间
time_match = time_pattern.match(line)
if time_match:
current_time = "{} {}".format(time_match.group(1), time_match.group(2))
end_pos_match = end_pos_pattern.search(line)
if end_pos_match:
current_end_pos = end_pos_match.group(1)
# 事务开始
if stripped.startswith('BEGIN'):
if current_txn is not None and current_txn['rows'] > 0:
current_txn = _close_transaction(
transactions,
current_txn,
current_end_pos,
current_time,
line_no - 1,
row_threshold,
'遇到新的 BEGIN,上一事务未闭合'
)
if current_txn is None:
current_txn = _new_transaction(current_pos, current_time, line_no, implicit=False)
continue
# 事务结束
if current_txn is not None and (
xid_pattern.search(line) or
stripped.startswith('COMMIT') or
stripped.startswith('XA COMMIT') or
stripped.startswith('ROLLBACK') or
stripped.startswith('XA ROLLBACK')
):
if stripped.startswith('ROLLBACK') or stripped.startswith('XA ROLLBACK'):
status = '已回滚'
else:
status = '已提交'
current_txn = _close_transaction(
transactions,
current_txn,
current_end_pos,
current_time,
line_no,
row_threshold,
status
)
continue
# 统计行变更
row_match = row_change_pattern.match(line)
if row_match:
if current_txn is None:
current_txn = _new_transaction(current_pos, current_time, line_no, implicit=True)
db_name = row_match.group('db_bt') or row_match.group('db_plain')
table_name = row_match.group('table_bt') or row_match.group('table_plain')
current_txn['rows'] += 1
if db_name:
current_txn['tables'].add("{}.{}".format(db_name, table_name))
else:
current_txn['tables'].add(table_name)
if current_txn is not None and current_txn['rows'] > 0:
_close_transaction(
transactions,
current_txn,
current_end_pos,
current_time,
last_line_no,
row_threshold,
'文件结束,事务未闭合'
)
return transactions
def main():
parser = argparse.ArgumentParser(description='分析 mysqlbinlog 导出的文本文件中的大事务')
parser.add_argument('txt_file', help='mysqlbinlog 导出的文本文件路径')
parser.add_argument(
'--row-threshold',
type=int,
default=1000,
help='大事务行数阈值,默认 1000'
)
args = parser.parse_args()
txns = analyze_binlog_large_transactions(args.txt_file, row_threshold=args.row_threshold)
print("共发现 {} 个大事务\n".format(len(txns)))
for i, txn in enumerate(txns, 1):
print("=== 大事务 #{} ===".format(i))
print(" 状态:{}".format(txn['status']))
print(" 起始方式:{}".format('隐式开始' if txn['implicit'] else '显式 BEGIN'))
print(" 文本行号:{} - {}".format(txn['start_line'], txn['end_line']))
print(" 开始位点:{}".format(txn['start_pos']))
print(" 结束位点:{}".format(txn['end_pos']))
print(" 开始时间:{}".format(txn['start_time']))
print(" 结束时间:{}".format(txn['end_time']))
print(" 影响行数:{}".format(txn['rows']))
print(" 涉及表:{}".format(', '.join(sorted(txn['tables']))))
print()
if __name__ == '__main__':
main()