MySQL通常不建议存储字节类数据比如pickle.dump后的数据,因为会破坏数据范式。
然后实际场景可能需要MySQL存储复杂类型数据,比如模型训练的复杂记录信息。
MySQL可以存储字节类数据,但需要提前将这些数据序列化。
这里示例几种存储字节类数据的方法。
1 直接存储为BLOB
直接从文件中读取或pickle.dump后的字节类数据,在mysql中可以存储为Blob类型。
示例代码如下所示。
import pymysql
import pickle
import json
# 创建连接
def create_connection():
connection = pymysql.connect(
host='localhost',
user='root',
password='mysqlpasswd',
database='mydb',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor
)
return connection
# 创建表(包含BLOB字段)
def create_table():
connection = create_connection()
try:
with connection.cursor() as cursor:
sql = """
CREATE TABLE IF NOT EXISTS pickle_data (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(100),
data BLOB,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
"""
cursor.execute(sql)
connection.commit()
finally:
connection.close()
# 上传pickle数据
def upload_pickle_data(name, data_obj):
# 将Python对象序列化为pickle数据
pickled_data = pickle.dumps(data_obj)
connection = create_connection()
try:
with connection.cursor() as cursor:
sql = "INSERT INTO pickle_data (name, data) VALUES (%s, %s)"
cursor.execute(sql, (name, pickled_data))
connection.commit()
print(f"数据 '{name}' 上传成功")
except Exception as e:
print(f"上传失败: {e}")
finally:
connection.close()
# 读取pickle数据
def read_pickle_data(name):
connection = create_connection()
try:
with connection.cursor() as cursor:
sql = "SELECT data FROM pickle_data WHERE name = %s"
cursor.execute(sql, (name,))
result = cursor.fetchone()
if result:
# 反序列化pickle数据
data_obj = pickle.loads(result['data'])
return data_obj
else:
print("未找到数据")
return None
finally:
connection.close()
# 使用示例
if __name__ == "__main__":
# 创建表
create_table()
# 示例数据
sample_data = {
'user_id': 123,
'user_name': '张三',
'preferences': {'theme': 'dark', 'language': 'zh'},
'data_list': [1, 2, 3, 4, 5],
'nested_data': {
'level1': {
'level2': ['a', 'b', 'c']
}
}
}
# 上传数据
upload_pickle_data('user_data', sample_data)
# 读取数据
retrieved_data = read_pickle_data('user_data')
print(f"读取的数据: {retrieved_data}")
print("\n读取数据:")
retrieved = read_pickle_data('user_data') # 从方法1读取
if retrieved:
print(f"用户偏好: {retrieved.get('preferences', {})}")
输出如下所示,数据在pickle.dump后上传成功,并且被成功读取。
数据 'user_data' 上传成功
读取的数据: {'user_id': 123, 'user_name': '张三', 'preferences': {'theme': 'dark', 'language': 'zh'}, 'data_list': [1, 2, 3, 4, 5], 'nested_data': {'level1': {'level2': ['a', 'b', 'c']}}}
读取数据:
用户偏好: {'theme': 'dark', 'language': 'zh'}
2 base64编码后存储
除直接存储为blob外,还可以使用base64将数据转化为utf-8编码后存储。
示例所示。
import pymysql
import pickle
import base64
def upload_pickle_base64(name, data_obj):
"""将pickle数据Base64编码后存储为TEXT"""
pickled_data = pickle.dumps(data_obj)
encoded_data = base64.b64encode(pickled_data).decode('utf-8')
connection = create_connection()
try:
with connection.cursor() as cursor:
# 创建表(如果需要)
sql = """
CREATE TABLE IF NOT EXISTS pickle_text_data (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(100),
data TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
"""
cursor.execute(sql)
# 插入数据
sql = "INSERT INTO pickle_text_data (name, data) VALUES (%s, %s)"
cursor.execute(sql, (name, encoded_data))
connection.commit()
print(f"数据 '{name}' 上传成功")
except Exception as e:
print(f"上传失败: {e}")
finally:
connection.close()
def read_pickle_base64(name):
"""读取Base64编码的pickle数据"""
connection = create_connection()
try:
with connection.cursor() as cursor:
sql = "SELECT data FROM pickle_text_data WHERE name = %s"
cursor.execute(sql, (name,))
result = cursor.fetchone()
if result:
# 解码Base64并反序列化
decoded_data = base64.b64decode(result['data'])
data_obj = pickle.loads(decoded_data)
return data_obj
finally:
connection.close()
# 使用示例
if __name__ == "__main__":
# 创建表
create_table()
# 示例数据
sample_data = {
'user_id': 123,
'user_name': '张三',
'preferences': {'theme': 'dark', 'language': 'zh'},
'data_list': [1, 2, 3, 4, 5],
'nested_data': {
'level1': {
'level2': ['a', 'b', 'c']
}
}
}
# 上传数据
upload_pickle_base64('user_data', sample_data)
# 读取数据
retrieved_data = read_pickle_base64('user_data')
print(f"读取的数据: {retrieved_data}")
输出如下
数据 'user_data' 上传成功
读取的数据: {'user_id': 123, 'user_name': '张三', 'preferences': {'theme': 'dark', 'language': 'zh'}, 'data_list': [1, 2, 3, 4, 5], 'nested_data': {'level1': {'level2': ['a', 'b', 'c']}}}
3 综合json和pickle存储
3.1 综合方案
字节数据依然pickle序列化后存储,但同时提供meta信息,为pickle数据读取提供方便。
准备元数据
metadata = {
'data_type': str(type(data_obj)),
'pickle_version': pickle.format_version,
'original_size': len(str(data_obj)),
'serialized_size': len(pickled_data),
'timestamp': datetime.now().isoformat()
}
示例代码如下所示。
import pymysql
import pickle
import json
from datetime import datetime
from decimal import Decimal
def create_table_with_metadata():
"""创建包含元数据信息的表"""
connection = create_connection()
try:
with connection.cursor() as cursor:
sql = """
CREATE TABLE IF NOT EXISTS serialized_data (
id INT AUTO_INCREMENT PRIMARY KEY,
data_name VARCHAR(100) NOT NULL,
data_type VARCHAR(50),
data_size INT,
pickle_data BLOB,
metadata JSON,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
INDEX idx_name (data_name)
)
"""
cursor.execute(sql)
connection.commit()
finally:
connection.close()
def upload_with_metadata(data_name, data_obj):
"""上传数据并保存元数据"""
# 序列化数据
pickled_data = pickle.dumps(data_obj)
# 准备元数据
metadata = {
'data_type': str(type(data_obj)),
'pickle_version': pickle.format_version,
'original_size': len(str(data_obj)),
'serialized_size': len(pickled_data),
'timestamp': datetime.now().isoformat()
}
connection = create_connection()
try:
with connection.cursor() as cursor:
sql = """
INSERT INTO serialized_data
(data_name, data_type, data_size, pickle_data, metadata)
VALUES (%s, %s, %s, %s, %s)
"""
cursor.execute(sql, (
data_name,
str(type(data_obj)),
len(pickled_data),
pickled_data,
json.dumps(metadata)
))
connection.commit()
print(f"数据 '{data_name}' 上传成功")
except Exception as e:
print(f"上传失败: {e}")
finally:
connection.close()
def list_all_data():
"""列出所有存储的数据"""
connection = create_connection()
try:
with connection.cursor() as cursor:
sql = """
SELECT id, data_name, data_type, data_size,
created_at, metadata
FROM serialized_data
ORDER BY created_at DESC
"""
cursor.execute(sql)
return cursor.fetchall()
finally:
connection.close()
3.2 运行示例
这里以神经网络训练数据存储为例,示例综合json和pickle的数据存储和读取过程。
def main():
# 1. 创建连接和表
create_table_with_metadata()
# 2. 准备要存储的数据
complex_data = {
'model_data': {
'weights': [0.1, 0.2, 0.3, 0.4],
'biases': [0.01, 0.02],
'config': {
'layers': [128, 64, 32],
'activation': 'relu'
}
},
'training_history': {
'loss': [0.5, 0.3, 0.2, 0.1],
'accuracy': [0.7, 0.8, 0.85, 0.9]
},
'timestamp': datetime.now()
}
# 3. 上传数据
upload_with_metadata('neural_network_model', complex_data)
# 4. 列出所有数据
print("\n存储的所有数据:")
all_data = list_all_data()
for row in all_data:
print(f"ID: {row['id']}, 名称: {row['data_name']}, "
f"大小: {row['data_size']} bytes")
if __name__ == "__main__":
main()
输出如下所示
数据 'neural_network_model' 上传成功
存储的所有数据:
ID: 1, 名称: neural_network_model, 大小: 340 bytes