设计一个基于多个带标签的PL/SQL模板作为配置文件和多组参数的Python Pandas代码程序,实现根据不同的输入参数自动批量地将Oracle数据库中的数据导出为CSV和Excel文件到指定目录上,标签和多个参数(以"_"分割)为组成导出数据文件名,文件已经存在则覆盖原始文件。
该解决方案提供了灵活的配置方式和可靠的错误处理机制,能够满足批量数据导出的需求,同时保证生成文件的规范性和一致性。使用Pandas和cx_Oracle实现多模板多参数的Oracle数据导出程序:
python
import os
import cx_Oracle
import pandas as pd
import yaml
import json
from pathlib import Path
class OracleDataExporter:
def __init__(self, config_path, params_path, output_dir, db_conn_str):
"""
初始化导出器
:param config_path: SQL模板配置文件路径
:param params_path: 参数配置文件路径
:param output_dir: 输出目录
:param db_conn_str: 数据库连接字符串
"""
self.config_path = config_path
self.params_path = params_path
self.output_dir = Path(output_dir)
self.db_conn_str = db_conn_str
self.connection = None
# 创建输出目录
self.output_dir.mkdir(parents=True, exist_ok=True)
def _load_config(self):
"""加载SQL模板配置"""
with open(self.config_path) as f:
return yaml.safe_load(f)['templates']
def _load_params(self):
"""加载参数配置"""
with open(self.params_path) as f:
return json.load(f)
def _generate_filename(self, label, params, param_order):
"""
生成标准文件名
:param label: 模板标签
:param params: 参数字典
:param param_order: 参数顺序列表
"""
param_values = [str(params[key]) for key in param_order]
return f"{label}_{'_'.join(param_values)}"
def _export_files(self, df, base_filename):
"""导出数据文件"""
csv_path = self.output_dir / f"{base_filename}.csv"
excel_path = self.output_dir / f"{base_filename}.xlsx"
# 导出CSV(覆盖模式)
df.to_csv(csv_path, index=False, mode='w')
# 导出Excel(覆盖模式)
df.to_excel(excel_path, index=False, engine='openpyxl')
return csv_path, excel_path
def _validate_parameters(self, params, required_params):
"""验证参数完整性"""
missing = [p for p in required_params if p not in params]
if missing:
raise ValueError(f"Missing required parameters: {missing}")
def execute(self):
"""执行导出流程"""
# 加载配置
templates = self._load_config()
all_params = self._load_params()
try:
# 建立数据库连接
self.connection = cx_Oracle.connect(self.db_conn_str)
# 遍历所有模板
for template in templates:
label = template['label']
sql_template = template['sql']
param_order = template['params_order']
param_groups = all_params.get(label, [])
if not param_groups:
print(f"⚠️ No parameters found for template '{label}', skipping...")
continue
print(f"🔨 Processing template: {label}")
# 处理每个参数组
for params in param_groups:
try:
self._validate_parameters(params, param_order)
base_name = self._generate_filename(label, params, param_order)
print(f"🔧 Executing with parameters: {params}")
df = pd.read_sql(sql_template, self.connection, params=params)
csv_path, excel_path = self._export_files(df, base_name)
print(f"✅ Exported: {csv_path.name} and {excel_path.name}")
except Exception as e:
print(f"❌ Error processing parameters {params}: {str(e)}")
continue
finally:
if self.connection:
self.connection.close()
if __name__ == "__main__":
# 配置参数
CONFIG = {
"config_path": "config/sql_templates.yaml",
"params_path": "config/parameters.json",
"output_dir": "exports",
"db_conn_str": "username/password@hostname:port/service_name"
}
# 执行导出
exporter = OracleDataExporter(**CONFIG)
exporter.execute()
配套配置文件示例
1. SQL模板配置文件 (sql_templates.yaml)
yaml
templates:
- label: sales_report
sql: |
SELECT *
FROM sales_data
WHERE region = :region
AND year = :year
AND product_category = :category
params_order: [region, year, category]
- label: customer_list
sql: |
SELECT customer_id, name, status
FROM customers
WHERE registration_date >= :start_date
AND registration_date <= :end_date
params_order: [start_date, end_date]
2. 参数配置文件 (parameters.json)
json
{
"sales_report": [
{"region": "North", "year": 2023, "category": "Electronics"},
{"region": "South", "year": 2022, "category": "Furniture"}
],
"customer_list": [
{"start_date": "2023-01-01", "end_date": "2023-03-31"},
{"start_date": "2023-04-01", "end_date": "2023-06-30"}
]
}
关键功能说明
-
模板管理:
- 使用YAML格式定义SQL模板
- 每个模板包含唯一标签、SQL语句和参数顺序定义
- 支持多行SQL语句和命名参数
-
参数管理:
- JSON格式存储参数组
- 参数组按模板标签分组管理
- 自动验证参数完整性
-
文件生成:
- 自动生成标准化文件名(标签_参数1_参数2...)
- 同时生成CSV和Excel文件
- 自动覆盖已存在文件
-
错误处理:
- 参数完整性验证
- 数据库连接安全管理
- 异常捕获和友好提示
-
扩展性:
- 易于添加新模板和参数组
- 支持复杂的SQL查询
- 可自定义输出目录和文件命名规则
使用说明
- 安装依赖:
bash
pip install cx-Oracle pandas pyyaml openpyxl
-
配置文件结构:
project/
├── config/
│ ├── sql_templates.yaml
│ └── parameters.json
├── exports/
├── main.py -
运行程序:
python
python main.py
-
输出结果示例:
exports/
├── sales_report_North_2023_Electronics.csv
├── sales_report_North_2023_Electronics.xlsx
├── sales_report_South_2022_Furniture.csv
├── sales_report_South_2022_Furniture.xlsx
├── customer_list_2023-01-01_2023-03-31.csv
├── customer_list_2023-01-01_2023-03-31.xlsx
├── customer_list_2023-04-01_2023-06-30.csv
└── customer_list_2023-04-01_2023-06-30.xlsx