用类做配置,这是个很好的开始!下面分享更多 Python 开发技巧,从基础到进阶。
一、配置管理的多种方式
1.1 使用的类配置(推荐用于中型项目)
python
# config.py - 类配置
from pathlib import Path
from typing import Dict, Any
class Config:
# 基础路径
BASE_DIR = Path(__file__).resolve().parent.parent
DATA_DIR = BASE_DIR / 'data'
OUTPUT_DIR = BASE_DIR / 'output'
# 模型参数
MODEL_NAME = 'resnet50'
BATCH_SIZE = 32
EPOCHS = 100
LEARNING_RATE = 0.001
# 数据参数
IMAGE_SIZE = (224, 224)
NUM_CLASSES = 10
# 运行时配置(可修改)
USE_GPU = True
DEBUG = False
# 使用方式
from config import Config
batch = Config.BATCH_SIZE # 32
1.2 环境变量配置(推荐用于生产)
python
# config.py - 环境变量配置
import os
from pathlib import Path
from dotenv import load_dotenv # 需要安装: pip install python-dotenv
# 加载 .env 文件
load_dotenv()
class EnvConfig:
# 从环境变量读取,提供默认值
DATABASE_URL = os.getenv('DATABASE_URL', 'postgresql://localhost/mydb')
SECRET_KEY = os.getenv('SECRET_KEY', 'dev-secret-key')
DEBUG = os.getenv('DEBUG', 'False').lower() == 'true'
PORT = int(os.getenv('PORT', '8000'))
# 路径配置
BASE_DIR = Path(__file__).resolve().parent
UPLOAD_DIR = BASE_DIR / os.getenv('UPLOAD_DIR', 'uploads')
对应的 .env 文件:
bash
# .env(不提交到 Git)
DATABASE_URL=postgresql://user:pass@localhost/prod_db
SECRET_KEY=your-secret-key-here
DEBUG=False
PORT=8080
1.3 多环境配置(开发/测试/生产)
python
# config.py - 多环境配置
from pathlib import Path
import os
class BaseConfig:
"""基础配置"""
BASE_DIR = Path(__file__).resolve().parent.parent
DATA_DIR = BASE_DIR / 'data'
# 默认配置
SECRET_KEY = 'default-secret'
BATCH_SIZE = 32
class DevelopmentConfig(BaseConfig):
"""开发环境配置"""
DEBUG = True
DATABASE_URL = 'sqlite:///dev.db'
SECRET_KEY = 'dev-secret'
BATCH_SIZE = 16 # 小批量方便调试
class ProductionConfig(BaseConfig):
"""生产环境配置"""
DEBUG = False
DATABASE_URL = os.getenv('DATABASE_URL')
SECRET_KEY = os.getenv('SECRET_KEY') # 必须从环境变量读取
# 生产优化
BATCH_SIZE = 128
WORKERS = 4
class TestingConfig(BaseConfig):
"""测试环境配置"""
TESTING = True
DATABASE_URL = 'sqlite:///:memory:' # 内存数据库
BATCH_SIZE = 8
# 配置工厂
def get_config():
env = os.getenv('ENV', 'development')
configs = {
'development': DevelopmentConfig,
'production': ProductionConfig,
'testing': TestingConfig
}
return configs.get(env, DevelopmentConfig)
# 使用
config = get_config()
print(config.DATABASE_URL)
1.4 使用 dataclass 配置(类型安全)
python
from dataclasses import dataclass
from pathlib import Path
@dataclass
class ModelConfig:
"""模型配置 - 带类型提示"""
model_name: str = 'resnet50'
batch_size: int = 32
learning_rate: float = 0.001
dropout_rate: float = 0.5
@dataclass
class DataConfig:
data_dir: Path = Path(__file__).parent / 'data'
train_ratio: float = 0.8
val_ratio: float = 0.1
test_ratio: float = 0.1
random_seed: int = 42
def __post_init__(self):
# 验证配置
total = self.train_ratio + self.val_ratio + self.test_ratio
if abs(total - 1.0) > 1e-6:
raise ValueError(f"比例和必须为1,当前为{total}")
self.data_dir.mkdir(parents=True, exist_ok=True)
@dataclass
class AppConfig:
model: ModelConfig = None
data: DataConfig = None
def __post_init__(self):
if self.model is None:
self.model = ModelConfig()
if self.data is None:
self.data = DataConfig()
# 使用
config = AppConfig()
print(config.model.batch_size) # 32
print(config.data.data_dir) # /path/to/data
二、路径和文件处理技巧
2.1 智能文件查找
python
from pathlib import Path
from typing import List, Optional
class FileFinder:
"""智能文件查找器"""
@staticmethod
def find_up(filename: str, start_path: Optional[Path] = None) -> Optional[Path]:
"""向上查找文件(类似 git 查找 .gitignore)"""
if start_path is None:
start_path = Path.cwd()
current = start_path.resolve()
while current != current.parent:
target = current / filename
if target.exists():
return target
current = current.parent
return None
@staticmethod
def find_project_root(markers: List[str] = None) -> Path:
"""查找项目根目录"""
if markers is None:
markers = ['.git', '.project_root', 'setup.py', 'pyproject.toml']
current = Path.cwd().resolve()
while current != current.parent:
for marker in markers:
if (current / marker).exists():
return current
current = current.parent
return Path.cwd()
# 使用示例
file_finder = FileFinder()
# 向上查找配置文件
config_file = file_finder.find_up('config.yaml')
print(config_file) # /home/user/project/config.yaml
# 查找项目根目录
root = file_finder.find_project_root()
print(root) # /home/user/project
2.2 临时文件和目录
python
import tempfile
from pathlib import Path
import shutil
class TempContext:
"""临时文件/目录上下文管理器"""
@staticmethod
@contextmanager
def temp_file(suffix='.txt', content=None):
"""创建临时文件"""
fd, path = tempfile.mkstemp(suffix=suffix)
try:
if content:
with os.fdopen(fd, 'w') as f:
f.write(content)
else:
os.close(fd)
yield Path(path)
finally:
Path(path).unlink(missing_ok=True)
@staticmethod
@contextmanager
def temp_dir():
"""创建临时目录"""
temp_dir = Path(tempfile.mkdtemp())
try:
yield temp_dir
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
# 使用
with TempContext.temp_file(content='Hello') as f:
print(f.read_text()) # Hello
# 文件自动删除
with TempContext.temp_dir() as d:
(d / 'test.txt').write_text('data')
print(d.exists()) # True
# 目录自动清理
三、数据处理技巧
3.1 惰性求值和生成器
python
from typing import Iterator, List
import csv
def read_large_csv(file_path: Path, chunk_size: int = 1000) -> Iterator[List[dict]]:
"""分块读取大文件"""
with file_path.open('r') as f:
reader = csv.DictReader(f)
chunk = []
for row in reader:
chunk.append(row)
if len(chunk) >= chunk_size:
yield chunk
chunk = []
if chunk:
yield chunk
# 使用 - 内存友好
for chunk in read_large_csv(Path('huge_file.csv')):
process_chunk(chunk) # 每次只处理1000行
3.2 缓存装饰器
python
from functools import lru_cache, wraps
import hashlib
import pickle
from pathlib import Path
class PersistentCache:
"""磁盘持久化缓存"""
def __init__(self, cache_dir: Path):
self.cache_dir = cache_dir
self.cache_dir.mkdir(exist_ok=True)
def __call__(self, func):
@wraps(func)
def wrapper(*args, **kwargs):
# 生成缓存键
key_data = f"{func.__name__}{args}{sorted(kwargs.items())}"
key = hashlib.md5(key_data.encode()).hexdigest()
cache_file = self.cache_dir / f"{key}.pkl"
# 读取缓存
if cache_file.exists():
with cache_file.open('rb') as f:
return pickle.load(f)
# 计算并缓存
result = func(*args, **kwargs)
with cache_file.open('wb') as f:
pickle.dump(result, f)
return result
return wrapper
# 使用
cache = PersistentCache(Path('./cache'))
@cache
def expensive_computation(n: int) -> int:
"""耗时计算,自动缓存"""
print(f"计算 {n}...")
return sum(range(n))
print(expensive_computation(1000000)) # 第一次计算
print(expensive_computation(1000000)) # 直接从缓存读取
3.3 数据验证装饰器
python
from functools import wraps
from typing import Any, Callable, TypeVar, Union
import numbers
T = TypeVar('T')
def validate_input(**validators):
"""参数验证装饰器"""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs):
# 绑定参数
bound_args = func.__annotations__
for param_name, validator in validators.items():
if param_name in kwargs:
value = kwargs[param_name]
elif len(args) > list(func.__code__.co_varnames).index(param_name):
value = args[list(func.__code__.co_varnames).index(param_name)]
else:
continue
# 执行验证
if not validator(value):
raise ValueError(f"参数 {param_name}={value} 验证失败")
return func(*args, **kwargs)
return wrapper
return decorator
# 验证器函数
def positive(x: Union[int, float]) -> bool:
return isinstance(x, numbers.Number) and x > 0
def between(min_val, max_val):
def validator(x):
return min_val <= x <= max_val
return validator
# 使用
@validate_input(age=positive, score=between(0, 100))
def process_student(name: str, age: int, score: float):
return f"{name}: age={age}, score={score}"
# 正常
print(process_student('Alice', 25, 95.5))
# 异常: ValueError: 参数 age=-5 验证失败
# process_student('Bob', -5, 90)
四、面向对象高级技巧
4.1 属性描述符
python
class ValidatedAttribute:
"""可验证的属性描述符"""
def __init__(self, validator=None):
self.validator = validator
self.name = None
def __set_name__(self, owner, name):
self.name = f"_{name}"
def __get__(self, obj, objtype=None):
if obj is None:
return self
return getattr(obj, self.name, None)
def __set__(self, obj, value):
if self.validator and not self.validator(value):
raise ValueError(f"Invalid value: {value}")
setattr(obj, self.name, value)
class Person:
name = ValidatedAttribute(lambda x: isinstance(x, str) and len(x) > 0)
age = ValidatedAttribute(lambda x: isinstance(x, int) and 0 < x < 150)
def __init__(self, name, age):
self.name = name
self.age = age
# 使用
p = Person("Alice", 30)
print(p.name, p.age) # Alice 30
# p.age = 200 # ValueError: Invalid value: 200
4.2 单例模式的优雅实现
python
from functools import wraps
def singleton(cls):
"""单例装饰器"""
instances = {}
@wraps(cls)
def wrapper(*args, **kwargs):
if cls not in instances:
instances[cls] = cls(*args, **kwargs)
return instances[cls]
return wrapper
@singleton
class DatabaseConnection:
def __init__(self):
self.connection = self._connect()
def _connect(self):
print("建立数据库连接...")
return "connection_object"
def query(self, sql):
return f"执行: {sql}"
# 使用 - 只有一个实例
db1 = DatabaseConnection() # 建立数据库连接...
db2 = DatabaseConnection() # 直接返回现有实例
print(db1 is db2) # True
4.3 上下文管理器的高级用法
python
from contextlib import contextmanager
import time
from pathlib import Path
@contextmanager
def timed_operation(name: str):
"""计时上下文管理器"""
print(f"开始: {name}")
start = time.time()
try:
yield
finally:
elapsed = time.time() - start
print(f"完成: {name} 耗时 {elapsed:.2f}秒")
@contextmanager
def atomic_write(file_path: Path, mode='w'):
"""原子写入 - 先写临时文件,成功后再重命名"""
temp_path = file_path.with_suffix(file_path.suffix + '.tmp')
try:
with temp_path.open(mode) as f:
yield f
temp_path.rename(file_path)
except Exception:
temp_path.unlink(missing_ok=True)
raise
# 使用
with timed_operation("数据处理"):
# 执行耗时操作
time.sleep(1)
with atomic_write(Path('important.txt')) as f:
f.write("重要数据")
# 如果这里出错,原文件不会被覆盖
五、调试和日志技巧
5.1 智能打印调试
python
from functools import wraps
import inspect
def debug(func):
"""自动打印函数调用信息的装饰器"""
@wraps(func)
def wrapper(*args, **kwargs):
# 获取参数名
sig = inspect.signature(func)
bound_args = sig.bind(*args, **kwargs)
bound_args.apply_defaults()
# 打印调用信息
args_str = ', '.join(f"{k}={v!r}" for k, v in bound_args.arguments.items())
print(f"🔍 调用: {func.__name__}({args_str})")
# 执行并记录结果
result = func(*args, **kwargs)
print(f"✅ 返回: {result!r}")
return result
return wrapper
# 使用
@debug
def calculate(a, b, c=10):
return a + b * c
calculate(5, 3) # 自动打印调用信息
# 🔍 调用: calculate(a=5, b=3, c=10)
# ✅ 返回: 35
5.2 结构化日志
python
import logging
from pathlib import Path
from datetime import datetime
class StructuredLogger:
"""结构化日志记录器"""
def __init__(self, name: str, log_dir: Path):
self.logger = logging.getLogger(name)
self.log_dir = log_dir
self.log_dir.mkdir(exist_ok=True)
# 配置文件日志
log_file = log_dir / f"{datetime.now():%Y%m%d}.log"
handler = logging.FileHandler(log_file)
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
handler.setFormatter(formatter)
self.logger.addHandler(handler)
self.logger.setLevel(logging.INFO)
def log(self, level: str, message: str, **kwargs):
"""记录结构化日志"""
extra = f" | {kwargs}" if kwargs else ""
getattr(self.logger, level.lower())(f"{message}{extra}")
def info(self, message: str, **kwargs):
self.log('INFO', message, **kwargs)
def error(self, message: str, **kwargs):
self.log('ERROR', message, **kwargs)
# 使用
logger = StructuredLogger('myapp', Path('./logs'))
logger.info('用户登录', user_id=123, ip='192.168.1.1')
logger.error('数据库连接失败', error='timeout', retry=3)
六、性能优化技巧
6.1 懒加载属性
python
class LazyProperty:
"""懒加载属性 - 只在第一次访问时计算"""
def __init__(self, func):
self.func = func
self.name = func.__name__
def __get__(self, obj, type=None):
if obj is None:
return self
value = self.func(obj)
setattr(obj, self.name, value) # 缓存结果
return value
class DataAnalyzer:
def __init__(self, data_path):
self.data_path = data_path
@LazyProperty
def data(self):
"""大数据集,只在需要时加载"""
print("加载数据...")
return self.data_path.read_text()
@LazyProperty
def processed_data(self):
"""处理后的数据,依赖 data"""
print("处理数据...")
return self.data.upper()
# 使用
analyzer = DataAnalyzer(Path('large_file.txt'))
# data 和 processed_data 只在第一次访问时才计算
print("对象已创建,但还未加载数据")
print(analyzer.data) # 第一次访问:加载数据...
print(analyzer.data) # 第二次访问:直接返回缓存
6.2 内存优化的 slots
python
# 传统类 - 占用内存大
class NormalPoint:
def __init__(self, x, y, z):
self.x = x
self.y = y
self.z = z
# 使用 __slots__ - 节省内存(约50-70%)
class OptimizedPoint:
__slots__ = ('x', 'y', 'z') # 固定属性,不允许动态添加
def __init__(self, x, y, z):
self.x = x
self.y = y
self.z = z
# 内存对比
import sys
normal = NormalPoint(1, 2, 3)
optimized = OptimizedPoint(1, 2, 3)
print(f"普通类: {sys.getsizeof(normal)} bytes") # 约56 bytes
print(f"优化类: {sys.getsizeof(optimized)} bytes") # 约48 bytes(不含属性)
七、项目组织技巧
7.1 动态导入
python
import importlib
from pathlib import Path
class PluginManager:
"""动态加载插件"""
def __init__(self, plugin_dir: Path):
self.plugin_dir = plugin_dir
self.plugins = {}
def load_plugins(self):
"""加载目录下的所有插件"""
for py_file in self.plugin_dir.glob('*_plugin.py'):
module_name = py_file.stem
spec = importlib.util.spec_from_file_location(module_name, py_file)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
# 查找插件类
for attr_name in dir(module):
attr = getattr(module, attr_name)
if hasattr(attr, 'is_plugin') and attr.is_plugin:
self.plugins[module_name] = attr()
def run_plugins(self, data):
results = {}
for name, plugin in self.plugins.items():
results[name] = plugin.process(data)
return results
# 插件定义示例
def plugin(cls):
"""标记为插件的装饰器"""
cls.is_plugin = True
return cls
@plugin
class MyPlugin:
def process(self, data):
return f"处理: {data}"
7.2 环境感知的路径管理
python
import sys
from pathlib import Path
class ProjectPaths:
"""智能项目路径管理"""
def __init__(self):
# 自动检测运行环境
self.is_frozen = getattr(sys, 'frozen', False)
self.is_test = 'pytest' in sys.modules or 'unittest' in sys.modules
# 确定基础目录
if self.is_frozen:
# PyInstaller 打包后的环境
self.base_dir = Path(sys.executable).parent
elif self.is_test:
# 测试环境
self.base_dir = Path.cwd() / 'test_tmp'
else:
# 开发环境
self.base_dir = Path(__file__).resolve().parent.parent
# 定义各个目录
self.data_dir = self.base_dir / 'data'
self.log_dir = self.base_dir / 'logs'
self.cache_dir = self.base_dir / 'cache'
self.output_dir = self.base_dir / 'output'
# 自动创建必要的目录
for dir_path in [self.data_dir, self.log_dir, self.cache_dir]:
if not self.is_test: # 测试环境下不创建
dir_path.mkdir(parents=True, exist_ok=True)
def __repr__(self):
return f"ProjectPaths(base={self.base_dir})"
# 使用
paths = ProjectPaths()
print(f"数据目录: {paths.data_dir}")
print(f"运行环境: {'打包' if paths.is_frozen else '开发'}")
这些技巧涵盖了从配置管理到性能优化的各个方面,可以根据项目需求选择性使用。记住:技巧是为解决问题服务的,不要过度设计!