达梦导入大数据
Python脚本
python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
INPUT_FILE = "HB_LOWAIR1209.sql"
OUTPUT_FILE = "HB_LOWAIR20251127_final_gb18030.sql"
HEADER_LINES = [
"WHENEVER SQLERROR CONTINUE;",
"SET AUTOCOMMIT OFF;"
]
def detect_encoding(filepath):
"""简单探测编码,优先用 UTF-8,失败则试 GB18030 / GBK"""
for enc in ['utf-8', 'gb18030', 'gbk']:
try:
with open(filepath, 'r', encoding=enc) as f:
f.readline() # 只读一行测试
return enc
except (UnicodeDecodeError, UnicodeError):
continue
return 'latin1' # 最后兜底
def main():
if not os.path.isfile(INPUT_FILE):
print(f"错误:输入文件 '{INPUT_FILE}' 不存在!", file=sys.stderr)
sys.exit(1)
# 探测原始编码
src_encoding = detect_encoding(INPUT_FILE)
print(f"✓ 检测到源文件编码: {src_encoding}")
# 流式写入:先写头部,再逐行复制原文件
try:
with open(INPUT_FILE, 'r', encoding=src_encoding, errors='replace') as fin, \
open(OUTPUT_FILE, 'w', encoding='gb18030', newline='') as fout:
# 写入头部
for line in HEADER_LINES:
fout.write(line + "\n")
fout.write("\n") # 空行分隔
# 逐行复制原文件(避免内存爆炸)
for line in fin:
fout.write(line)
print(f"✓ 成功生成 GB18030 编码文件: {OUTPUT_FILE}")
print(f" 请手动执行后续 docker cp 和 disql 命令。")
except Exception as e:
print(f"❌ 处理过程中出错: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()
执行脚本
docker cp HB_LOWAIR20251127_final_gb18030.sql dm8_container:/tmp/
docker exec -d dm8_container sh -c "
export LANG=zh_CN.GB18030 &&
export LC_ALL=zh_CN.GB18030 &&
export LD_LIBRARY_PATH=/opt/dmdbms/bin:\$LD_LIBRARY_PATH &&
cd /tmp &&
cat HB_LOWAIR20251127_final_gb18030.sql | /opt/dmdbms/bin/disql -L SYSDBA/SYSDBA001 > import.log 2>&1
"