在Python中,序列化和反序列化对象有多种方法。以下是主要的方法和示例:
1. 使用 pickle 模块(最常用)
pickle 是Python的标准序列化模块,可以处理大多数Python对象。
基本使用
python
import pickle
# 定义一个示例对象
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
def __repr__(self):
return f"Person(name={self.name}, age={self.age})"
# 创建对象
person = Person("Alice", 30)
# 序列化(对象 → 字节流)
serialized = pickle.dumps(person)
print(f"序列化结果: {serialized[:50]}...")
# 反序列化(字节流 → 对象)
deserialized = pickle.loads(serialized)
print(f"反序列化结果: {deserialized}")
# 序列化到文件
with open('person.pkl', 'wb') as f:
pickle.dump(person, f)
# 从文件反序列化
with open('person.pkl', 'rb') as f:
loaded_person = pickle.load(f)
print(f"从文件加载: {loaded_person}")
处理复杂对象
python
import pickle
class Company:
def __init__(self, name, employees):
self.name = name
self.employees = employees
# 嵌套对象
employees = [Person("Bob", 25), Person("Charlie", 35)]
company = Company("TechCorp", employees)
# 序列化
data = pickle.dumps(company)
restored = pickle.loads(data)
print(restored.name) # TechCorp
print(restored.employees) # [Person(name=Bob, age=25), ...]
2. 使用 json 模块
JSON更适合跨语言的数据交换,但只能处理基本数据类型。
python
import json
# 基本类型
data = {
"name": "Alice",
"age": 30,
"hobbies": ["reading", "hiking"]
}
# 序列化为JSON字符串
json_str = json.dumps(data, indent=2)
print(json_str)
# 反序列化
parsed = json.loads(json_str)
print(parsed["name"]) # Alice
# 处理自定义对象
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
def to_dict(self):
return {"name": self.name, "age": self.age}
@classmethod
def from_dict(cls, data):
return cls(data["name"], data["age"])
person = Person("Bob", 25)
# 自定义序列化
person_dict = person.to_dict()
json_str = json.dumps(person_dict)
print(json_str) # {"name": "Bob", "age": 25}
# 自定义反序列化
loaded_dict = json.loads(json_str)
loaded_person = Person.from_dict(loaded_dict)
print(loaded_person.name) # Bob
3. 使用 json 模块的自定义编码器/解码器
python
import json
from datetime import datetime
class PersonEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Person):
return {
"__person__": True,
"name": obj.name,
"age": obj.age
}
elif isinstance(obj, datetime):
return {"__datetime__": obj.isoformat()}
return super().default(obj)
class PersonDecoder(json.JSONDecoder):
def __init__(self, *args, **kwargs):
super().__init__(object_hook=self.object_hook, *args, **kwargs)
def object_hook(self, dct):
if "__person__" in dct:
return Person(dct["name"], dct["age"])
if "__datetime__" in dct:
return datetime.fromisoformat(dct["__datetime__"])
return dct
# 使用自定义编码器
person = Person("Alice", 30)
json_str = json.dumps(person, cls=PersonEncoder)
print(json_str)
# 使用自定义解码器
restored = json.loads(json_str, cls=PersonDecoder)
print(restored.name) # Alice
4. 使用 dataclasses 和 asdict(Python 3.7+)
python
from dataclasses import dataclass, asdict
from typing import List
import json
@dataclass
class Address:
street: str
city: str
@dataclass
class Employee:
name: str
age: int
address: Address
skills: List[str]
# 创建对象
address = Address("123 Main St", "New York")
employee = Employee("Alice", 30, address, ["Python", "Java"])
# 序列化
employee_dict = asdict(employee)
json_str = json.dumps(employee_dict, indent=2)
print(json_str)
# 反序列化
def from_dict(cls, data):
return cls(**data)
loaded_dict = json.loads(json_str)
loaded_employee = Employee(**loaded_dict)
print(loaded_employee.name) # Alice
5. 使用 getstate 和 setstate 自定义pickle行为
python
import pickle
class SecurePerson:
def __init__(self, name, age, password):
self.name = name
self.age = age
self._password = password # 敏感信息
def __getstate__(self):
# 在序列化时排除敏感信息
state = self.__dict__.copy()
del state['_password']
return state
def __setstate__(self, state):
# 反序列化时恢复对象
self.__dict__.update(state)
self._password = None # 密码不恢复
person = SecurePerson("Alice", 30, "secret123")
serialized = pickle.dumps(person)
restored = pickle.loads(serialized)
print(restored.name) # Alice
print(restored._password) # None (密码被安全处理)
6. 使用第三方库
dill(扩展的pickle)
python
import dill # pip install dill
# dill可以序列化更多Python对象,包括lambda函数
func = lambda x: x * 2
serialized = dill.dumps(func)
restored = dill.loads(serialized)
print(restored(5)) # 10
marshmallow(更结构化的序列化)
python
from marshmallow import Schema, fields # pip install marshmallow
class PersonSchema(Schema):
name = fields.Str()
age = fields.Int()
person = {"name": "Alice", "age": 30}
schema = PersonSchema()
# 序列化
result = schema.dump(person)
print(result) # {'name': 'Alice', 'age': 30}
# 反序列化
data = {"name": "Bob", "age": "25"} # 注意age是字符串
loaded = schema.load(data)
print(loaded) # {'name': 'Bob', 'age': 25}
选择建议
- pickle:Python内部使用,需要序列化复杂对象或整个对象图
- json:跨语言、网络传输、配置文件
- dataclasses + json:结构化数据,类型安全
- marshmallow:API开发,需要数据验证
- dill:需要序列化lambda、闭包等特殊对象
注意事项
- pickle不能序列化文件句柄、数据库连接等资源
- pickle有安全风险,不要反序列化不可信的来源
- JSON只能序列化基本数据类型,需要自定义编码器处理复杂对象
- 考虑版本兼容性,特别是当数据结构变化时