由于最近项目需要把mysql数据库换为postgresql,发现表已经很大了,备份出来,如果手工改动,太麻烦了。
所以这里记录下,采用python直接把导出来的mysql直接平移到postgresql,两者基础语句差不多,mysql转postgresql还是好转的,如果没有其他什么视图、触发器的、外键、外键索引的,只要把自增主键、注释这些优化下就好,
注意这里把主键自增ID 修改为了SERIAL
不用说了直接上代码。
环境是:python 3.8.9
python
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
MySQL转PostgreSQL SQL转换器
使用字符串解析方式,不依赖第三方库
"""
import re
import argparse
import os
def convert_data_type(mysql_type):
"""转换MySQL数据类型到PostgreSQL数据类型"""
mysql_type_upper = mysql_type.upper().strip()
# 提取类型基础名和参数
type_match = re.match(r'(\w+)(?:\(([^)]+)\))?', mysql_type_upper)
if not type_match:
return mysql_type
base_type = type_match.group(1)
params = type_match.group(2)
# 整数类型
if base_type == 'TINYINT':
if params and params.strip() == '1':
return 'BOOLEAN'
return 'SMALLINT'
if base_type == 'SMALLINT':
return 'SMALLINT'
if base_type in ('MEDIUMINT', 'INT', 'INTEGER'):
return 'INTEGER'
if base_type == 'BIGINT':
return 'BIGINT'
# 浮点类型
if base_type == 'FLOAT':
return 'REAL'
if base_type == 'DOUBLE':
return 'DOUBLE PRECISION'
if base_type == 'DECIMAL':
if params:
return f'DECIMAL({params})'
return 'DECIMAL'
# 字符串类型
if base_type == 'VARCHAR':
if params:
try:
length = int(params.strip())
# 超过2000长度时转为TEXT,避免PostgreSQL报错
if length > 2000:
return 'TEXT'
return f'VARCHAR({params})'
except ValueError:
return f'VARCHAR({params})'
return 'VARCHAR(255)'
if base_type == 'CHAR':
if params:
try:
length = int(params.strip())
# CHAR超过255时转为TEXT
if length > 255:
return 'TEXT'
return f'CHAR({params})'
except ValueError:
return f'CHAR({params})'
return 'CHAR(1)'
if base_type in ('TEXT', 'TINYTEXT', 'MEDIUMTEXT', 'LONGTEXT'):
return 'TEXT'
# 二进制类型
if base_type in ('BLOB', 'TINYBLOB', 'MEDIUMBLOB', 'LONGBLOB'):
return 'BYTEA'
# 日期时间类型
if base_type == 'DATETIME':
return 'TIMESTAMP'
if base_type == 'DATE':
return 'DATE'
if base_type == 'TIME':
return 'TIME'
if base_type == 'TIMESTAMP':
return 'TIMESTAMP'
if base_type == 'YEAR':
return 'INTEGER'
# 枚举和集合
if base_type == 'ENUM':
return 'VARCHAR(50)'
if base_type == 'SET':
return 'VARCHAR(255)'
# 其他
if base_type == 'SERIAL':
return 'SERIAL'
if base_type == 'BIGSERIAL':
return 'BIGSERIAL'
return mysql_type
def remove_backticks(text):
"""移除MySQL反引号"""
return text.replace('`', '')
def escape_quotes(text):
"""将MySQL的转义单引号 \\' 转换为PostgreSQL的标准写法 ''"""
# MySQL使用 \\' 转义单引号,PostgreSQL使用 ''
# 注意:需要小心处理,不要把正常的反斜杠弄乱
result = text.replace("\\'", "''")
return result
def convert_hex_to_bytea(text):
"""将MySQL的十六进制字面量转换为PostgreSQL BYTEA格式
MySQL: 0xE696B0E78988E69CACE58685E5AEB9
PostgreSQL: '\xE696B0E78988E69CACE58685E5AEB9'
注意:只匹配独立的十六进制字面量,避免误匹配base64等字符串中的0x
"""
# MySQL十六进制字面量的特征:
# 1. 0x开头
# 2. 后面紧跟纯十六进制字符(0-9, a-f, A-F)
# 3. 作为独立值出现,前后是分隔符(逗号、括号、空格等)
# 4. 不在引号字符串内部
result = text
# 匹配:前面是值分隔符(括号、逗号、空格、等号),后面也是分隔符或语句结束
# 确保十六进制值后面紧跟的是分隔符,而不是base64中的其他字符如 + /
pattern = r"((?<=[,(=\s])0x[0-9a-fA-F]+)(?=[,)\s;]|$)"
def replace_hex(match):
hex_literal = match.group(1)
# 提取十六进制部分(去掉0x前缀)
hex_value = hex_literal[2:]
return f"'\\x{hex_value}'"
result = re.sub(pattern, replace_hex, result)
return result
def convert_create_table(mysql_sql):
"""转换CREATE TABLE语句"""
result_lines = []
comment_lines = []
foreign_key_statements = [] # 外键约束单独收集
# 提取表名
table_match = re.search(
r'CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?`?(\w+)`?\s*\(',
mysql_sql,
re.IGNORECASE
)
if not table_match:
return "", []
table_name = table_match.group(1)
# 提取表注释
table_comment_match = re.search(r"COMMENT\s*=\s*'(.*?)'\s*(?:;|$)", mysql_sql, re.IGNORECASE)
table_comment = table_comment_match.group(1) if table_comment_match else None
# 转义表注释中的单引号
if table_comment:
table_comment = table_comment.replace("\\'", "''")
# 提取列定义部分 - 找到最后一个 ) 的位置
paren_start = mysql_sql.find('(')
# 找到匹配的右括号(需要处理字符串内的括号)
depth = 0
paren_end = -1
in_string = False
string_char = None
i = paren_start
while i < len(mysql_sql):
char = mysql_sql[i]
# 处理转义
if char == '\\' and in_string and i + 1 < len(mysql_sql):
i += 2
continue
# 处理字符串边界
if char in ("'", '"'):
if not in_string:
in_string = True
string_char = char
elif char == string_char:
in_string = False
string_char = None
elif not in_string:
if char == '(':
depth += 1
elif char == ')':
depth -= 1
if depth == 0:
paren_end = i
break
i += 1
if paren_start == -1 or paren_end == -1:
return "", []
columns_section = mysql_sql[paren_start + 1:paren_end]
# 分割列定义
column_defs = split_column_defs(columns_section)
# 解析每一列
columns = []
primary_key_cols = [] # 表级PRIMARY KEY定义
column_primary_keys = [] # 列级PRIMARY KEY(AUTO_INCREMENT或列定义中的PRIMARY KEY)
unique_keys = []
index_keys = []
foreign_keys = []
for col_def in column_defs:
col_def = col_def.strip()
if not col_def:
continue
col_def_upper = col_def.upper()
# 处理PRIMARY KEY约束
if col_def_upper.startswith('PRIMARY KEY'):
pk_match = re.search(r'PRIMARY\s+KEY\s*\(([^)]+)\)', col_def, re.IGNORECASE)
if pk_match:
pk_cols = remove_backticks(pk_match.group(1))
primary_key_cols.append(pk_cols)
continue
# 处理UNIQUE KEY约束
if col_def_upper.startswith('UNIQUE KEY') or col_def_upper.startswith('UNIQUE INDEX'):
uk_match = re.search(r'(?:UNIQUE\s+(?:KEY|INDEX))\s+`?(\w+)`?\s*\(([^)]+)\)', col_def, re.IGNORECASE)
if uk_match:
idx_name = uk_match.group(1)
idx_cols = remove_backticks(uk_match.group(2))
unique_keys.append((idx_name, idx_cols))
continue
# 处理INDEX/KEY约束
if col_def_upper.startswith('KEY ') or col_def_upper.startswith('INDEX '):
idx_match = re.search(r'(?:KEY|INDEX)\s+`?(\w+)`?\s*\(([^)]+)\)', col_def, re.IGNORECASE)
if idx_match:
idx_name = idx_match.group(1)
idx_cols = remove_backticks(idx_match.group(2))
index_keys.append((idx_name, idx_cols))
continue
# 处理外键约束
if 'FOREIGN KEY' in col_def_upper:
fk_match = re.search(
r'FOREIGN\s+KEY\s*\(([^)]+)\)\s*REFERENCES\s+`?(\w+)`?\s*\(([^)]+)\)',
col_def,
re.IGNORECASE
)
if fk_match:
fk_local = remove_backticks(fk_match.group(1))
fk_ref_table = fk_match.group(2)
fk_ref_cols = remove_backticks(fk_match.group(3))
foreign_keys.append((fk_local, fk_ref_table, fk_ref_cols))
continue
# 处理普通列
parsed = parse_column(col_def)
if parsed:
columns.append(parsed['definition'])
if parsed['comment']:
comment_lines.append(f"COMMENT ON COLUMN {table_name}.{parsed['name']} IS '{parsed['comment']}';")
# 收集列级主键
if parsed.get('has_primary_key'):
column_primary_keys.append(parsed['name'])
# 生成CREATE TABLE语句
result_lines.append(f"DROP TABLE IF EXISTS {table_name};")
# 确定主键列
pk_columns = column_primary_keys if column_primary_keys else ([pk.strip() for pk in primary_key_cols[0].split(',')] if primary_key_cols else [])
# 构建列定义
# 如果是单列主键,可以在列定义中加PRIMARY KEY
# 如果是复合主键(多列),需要在表末尾添加表级约束
is_composite_pk = len(pk_columns) > 1
column_defs_final = []
for col in columns:
col_name = col.split()[0] # 获取列名
# 只有单列主键时才在列定义中添加PRIMARY KEY
if pk_columns and col_name in pk_columns and not is_composite_pk:
if 'PRIMARY KEY' not in col.upper():
column_defs_final.append(col + ' PRIMARY KEY')
else:
column_defs_final.append(col)
else:
column_defs_final.append(col)
# 如果是复合主键,在列定义末尾添加表级主键约束
if is_composite_pk:
pk_cols_str = ', '.join(pk_columns)
column_defs_final.append(f'PRIMARY KEY ({pk_cols_str})')
result_lines.append(f"CREATE TABLE {table_name} (")
result_lines.append(",\n".join(f" {col}" for col in column_defs_final))
result_lines.append(");")
result_lines.append("")
# 添加唯一索引
for idx_name, idx_cols in unique_keys:
result_lines.append(f"CREATE UNIQUE INDEX idx_{table_name}_{idx_name} ON {table_name} ({idx_cols});")
# 添加普通索引
for idx_name, idx_cols in index_keys:
result_lines.append(f"CREATE INDEX idx_{table_name}_{idx_name} ON {table_name} ({idx_cols});")
# 外键约束单独收集,最后统一添加
for fk_local, fk_ref_table, fk_ref_cols in foreign_keys:
foreign_key_statements.append(f"ALTER TABLE {table_name} ADD FOREIGN KEY ({fk_local}) REFERENCES {fk_ref_table}({fk_ref_cols});")
# 添加表注释
if table_comment:
result_lines.append(f"COMMENT ON TABLE {table_name} IS '{table_comment}';")
# 添加列注释
result_lines.extend(comment_lines)
return "\n".join(result_lines) + "\n", foreign_key_statements
def split_column_defs(columns_section):
"""分割列定义,正确处理括号嵌套"""
columns = []
current = ""
depth = 0
in_string = False
string_char = None
i = 0
while i < len(columns_section):
char = columns_section[i]
# 处理字符串
if char in ("'", '"') and (i == 0 or columns_section[i-1] != '\\'):
if not in_string:
in_string = True
string_char = char
elif char == string_char:
in_string = False
string_char = None
current += char
elif in_string:
current += char
elif char == '(':
depth += 1
current += char
elif char == ')':
depth -= 1
current += char
elif char == ',' and depth == 0:
if current.strip():
columns.append(current.strip())
current = ""
else:
current += char
i += 1
if current.strip():
columns.append(current.strip())
return columns
def parse_column(col_def):
"""解析单个列定义"""
col_def = remove_backticks(col_def.strip())
# 先提取注释(注释可能包含各种特殊字符)
comment = None
comment_match = re.search(r"COMMENT\s*'(.*?)'(?:\s*$|\s*USING|$)", col_def, re.IGNORECASE)
if comment_match:
comment = comment_match.group(1)
# 转义注释中的单引号
comment = comment.replace("\\'", "''")
# 移除注释部分
col_def = col_def[:comment_match.start()] + col_def[comment_match.end():]
# 提取列名(第一个单词)
name_match = re.match(r'^(\w+)', col_def)
if not name_match:
return None
col_name = name_match.group(1)
# 提取数据类型(可能包含括号参数,如 varchar(255), decimal(10,2))
rest = col_def[len(col_name):].strip()
# 使用更精确的方式提取类型
# 类型可能是: word 或 word(params)
type_match = re.match(r"^(\w+)(?:\(([^)]*)\))?", rest, re.IGNORECASE)
if not type_match:
return None
type_base = type_match.group(1)
type_params = type_match.group(2)
if type_params:
mysql_type = f"{type_base}({type_params})"
else:
mysql_type = type_base
pg_type = convert_data_type(mysql_type)
rest = rest[type_match.end():].strip()
# 检查AUTO_INCREMENT
is_auto_increment = 'AUTO_INCREMENT' in rest.upper()
if is_auto_increment:
rest = re.sub(r'AUTO_INCREMENT', '', rest, flags=re.IGNORECASE).strip()
if 'BIGINT' in pg_type.upper():
pg_type = 'BIGSERIAL'
else:
pg_type = 'SERIAL'
# 检查NOT NULL
not_null = bool(re.search(r'\bNOT\s+NULL\b', rest, re.IGNORECASE))
# 检查NULL
is_null = bool(re.search(r'\bNULL\b', rest, re.IGNORECASE)) and not not_null
# 提取DEFAULT值
default_value = None
# 匹配 DEFAULT 'xxx' 或 DEFAULT "xxx" 或 DEFAULT 数字 或 DEFAULT NULL 或 DEFAULT CURRENT_TIMESTAMP 等
default_match = re.search(
r"DEFAULT\s+((?:'[^']*'|\"[^\"]*\"|\d+\.?\d*|NULL|CURRENT_TIMESTAMP|CURRENT_DATE|CURRENT_TIME|TRUE|FALSE))",
rest,
re.IGNORECASE
)
if default_match:
default_value = default_match.group(1)
# 检查PRIMARY KEY
is_primary = bool(re.search(r'\bPRIMARY\s+KEY\b', rest, re.IGNORECASE))
# 检查UNIQUE
is_unique = bool(re.search(r'\bUNIQUE\b', rest, re.IGNORECASE))
# 构建PostgreSQL列定义
parts = [col_name, pg_type]
# 判断是否有主键(AUTO_INCREMENT或显式PRIMARY KEY)
# 不在列定义中添加PRIMARY KEY,统一用ALTER TABLE添加
has_primary_key = is_auto_increment or is_primary
if not_null:
parts.append('NOT NULL')
elif is_null:
parts.append('NULL')
if default_value:
default_upper = default_value.upper()
if default_upper == 'CURRENT_TIMESTAMP':
parts.append('DEFAULT CURRENT_TIMESTAMP')
elif default_upper == 'CURRENT_DATE':
parts.append('DEFAULT CURRENT_DATE')
elif default_upper == 'CURRENT_TIME':
parts.append('DEFAULT CURRENT_TIME')
else:
# 转义DEFAULT值中的单引号
default_value = default_value.replace("\\'", "''")
parts.append(f'DEFAULT {default_value}')
if is_unique:
parts.append('UNIQUE')
return {
'name': col_name,
'definition': ' '.join(parts),
'comment': comment,
'has_primary_key': has_primary_key
}
def convert_insert_statement(mysql_sql):
"""转换INSERT语句"""
pg_sql = remove_backticks(mysql_sql)
pg_sql = escape_quotes(pg_sql)
pg_sql = convert_hex_to_bytea(pg_sql)
# 替换MySQL特有函数
pg_sql = re.sub(r'\bNOW\s*\(\)', 'CURRENT_TIMESTAMP', pg_sql, flags=re.IGNORECASE)
pg_sql = re.sub(r'\bUUID\s*\(\)', 'gen_random_uuid()', pg_sql, flags=re.IGNORECASE)
pg_sql = re.sub(r'\bCURDATE\s*\(\)', 'CURRENT_DATE', pg_sql, flags=re.IGNORECASE)
pg_sql = re.sub(r'\bCURTIME\s*\(\)', 'CURRENT_TIME', pg_sql, flags=re.IGNORECASE)
return pg_sql
def convert_mysql_to_postgresql(mysql_sql):
"""将MySQL SQL转换为PostgreSQL SQL"""
result = []
all_foreign_keys = [] # 收集所有外键约束
# 预处理:移除MySQL特有的设置
mysql_sql = re.sub(r'SET\s+FOREIGN_KEY_CHECKS\s*=\s*0\s*;?', '', mysql_sql, flags=re.IGNORECASE)
mysql_sql = re.sub(r'SET\s+FOREIGN_KEY_CHECKS\s*=\s*1\s*;?', '', mysql_sql, flags=re.IGNORECASE)
mysql_sql = re.sub(r'SET\s+NAMES\s+\w+\s*;?', '', mysql_sql, flags=re.IGNORECASE)
# 分割SQL语句
statements = split_sql_statements(mysql_sql)
for stmt in statements:
stmt = stmt.strip()
if not stmt:
continue
stmt_upper = stmt.upper()
# 处理CREATE TABLE
if stmt_upper.startswith('CREATE TABLE'):
pg_stmt, fk_list = convert_create_table(stmt)
if pg_stmt:
result.append(pg_stmt)
all_foreign_keys.extend(fk_list)
# 处理DROP TABLE - 跳过,因为CREATE TABLE中已包含
elif stmt_upper.startswith('DROP TABLE'):
continue
# 处理INSERT
elif stmt_upper.startswith('INSERT INTO'):
pg_stmt = convert_insert_statement(stmt)
result.append(pg_stmt + ";")
# 处理CREATE INDEX
elif stmt_upper.startswith('CREATE INDEX') or stmt_upper.startswith('CREATE UNIQUE INDEX'):
pg_stmt = remove_backticks(stmt)
result.append(pg_stmt + ";")
# 处理UPDATE语句
elif stmt_upper.startswith('UPDATE'):
pg_stmt = remove_backticks(stmt)
pg_stmt = escape_quotes(pg_stmt)
pg_sql = convert_hex_to_bytea(pg_stmt)
result.append(pg_sql + ";")
# 处理DELETE语句
elif stmt_upper.startswith('DELETE'):
pg_stmt = remove_backticks(stmt)
pg_stmt = escape_quotes(pg_stmt)
result.append(pg_stmt + ";")
# 处理ALTER语句
elif stmt_upper.startswith('ALTER'):
pg_stmt = remove_backticks(stmt)
pg_stmt = escape_quotes(pg_stmt)
result.append(pg_stmt + ";")
# 处理TRUNCATE语句
elif stmt_upper.startswith('TRUNCATE'):
pg_stmt = remove_backticks(stmt)
result.append(pg_stmt + ";")
# 跳过MySQL特有语句
elif any(stmt_upper.startswith(kw) for kw in ['CREATE DATABASE', 'USE ', 'LOCK TABLES', 'UNLOCK TABLES']):
continue
else:
# 其他语句保留并标记需要检查
result.append(f"-- TODO: 请检查以下SQL是否兼容PostgreSQL")
result.append(remove_backticks(stmt) + ";")
# 在所有表创建之后添加外键约束
if all_foreign_keys:
result.append("")
result.append("-- 外键约束")
result.extend(all_foreign_keys)
return "\n".join(result)
def split_sql_statements(sql):
"""智能分割SQL语句,处理字符串和注释"""
statements = []
current = ""
in_string = False
string_char = None
i = 0
while i < len(sql):
char = sql[i]
# 处理转义字符
if char == '\\' and i + 1 < len(sql) and in_string:
current += char + sql[i + 1]
i += 2
continue
# 处理字符串
if char in ("'", '"'):
if not in_string:
in_string = True
string_char = char
elif char == string_char:
in_string = False
string_char = None
current += char
# 处理分号(语句分隔符)
elif char == ';' and not in_string:
if current.strip():
statements.append(current.strip())
current = ""
# 处理行注释
elif char == '-' and i + 1 < len(sql) and sql[i + 1] == '-' and not in_string:
while i < len(sql) and sql[i] != '\n':
i += 1
# 处理块注释
elif char == '/' and i + 1 < len(sql) and sql[i + 1] == '*' and not in_string:
i += 2
while i < len(sql) - 1 and not (sql[i] == '*' and sql[i + 1] == '/'):
i += 1
i += 1 # 跳过 */
else:
current += char
i += 1
# 添加最后一个语句
if current.strip():
statements.append(current.strip())
return statements
def main():
"""主函数"""
parser = argparse.ArgumentParser(description='MySQL转PostgreSQL SQL转换器')
parser.add_argument('-i', '--input', required=True, help='输入的MySQL SQL文件路径')
parser.add_argument('-o', '--output', required=True, help='输出的PostgreSQL SQL文件路径')
args = parser.parse_args()
# 检查输入文件
if not os.path.exists(args.input):
print(f"错误:输入文件 '{args.input}' 不存在")
return
# 读取输入文件
try:
with open(args.input, 'r', encoding='utf-8') as f:
mysql_sql = f.read()
except Exception as e:
print(f"读取文件时出错: {e}")
return
# 转换SQL
print("开始转换...")
postgresql_sql = convert_mysql_to_postgresql(mysql_sql)
# 写入输出文件
try:
with open(args.output, 'w', encoding='utf-8') as f:
f.write(postgresql_sql)
print(f"转换成功!结果已保存到 '{args.output}'")
except Exception as e:
print(f"写入文件时出错: {e}")
# 使用示例:
# python mysql_to_postgresql.py -i input.sql -o output_psql.sql
if __name__ == "__main__":
main()
把上述代码保存为 mysql_to_postgresql.py
执行命令 :python mysql_to_postgresql.py -i 输入mysql语句 -o 输出postgresql建表语句
如下:
python
python mysql_to_postgresql.py -i admincomm.sql -o admincomm_psql.sql