DeepSeek辅助测试三种ODS电子表格写入程序

ODS是openoffice的电子表格格式,rusty_sheet插件也能读取,需要造一些数据,想知道有哪些程序能高效生成,在pypi上搜索发现,如下三种:

  1. odswriter
  2. pyexcel-ods3
  3. stream-write-ods

前两种是内存一次写入的,后一种是流式的。

以下是整合xlsx、xlsb、ODS三种格式的写入程序

python 复制代码
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
import time
import os
from decimal import Decimal
from collections import OrderedDict

# 尝试导入各种格式的库
try:
    from pyxlsbwriter import XlsbWriter
    XLSB_AVAILABLE = True
except ImportError:
    XLSB_AVAILABLE = False
    print("警告: pyxlsbwriter 未安装,无法生成xlsb格式文件")

try:
    import odswriter as ods
    ODSWRITER_AVAILABLE = True
except ImportError:
    ODSWRITER_AVAILABLE = False
    print("警告: odswriter 未安装,无法使用odswriter生成ods格式文件")

try:
    from pyexcel_ods3 import save_data
    PYEXCEL_ODS_AVAILABLE = True
except ImportError:
    PYEXCEL_ODS_AVAILABLE = False
    print("警告: pyexcel-ods3 未安装,无法使用pyexcel_ods3生成ods格式文件")

try:
    from stream_write_ods import stream_write_ods
    STREAM_ODS_AVAILABLE = True
except ImportError:
    STREAM_ODS_AVAILABLE = False
    print("警告: stream-write-ods 未安装,无法使用流式ODS写入")

def generate_stock_data(date, num_rows=10000):
    """
    生成指定日期的模拟股市行情数据
    
    Args:
        date: 日期 (datetime对象)
        num_rows: 数据行数
    
    Returns:
        tuple: (data_list, data_df) - 列表格式和DataFrame格式的数据
    """
    # 生成股票代码列表
    stock_codes = [f"STK{str(i).zfill(5)}" for i in range(1, 1001)]
    
    # 表头
    headers = ["股票代码", "时间", "开盘价", "最高价", "最低价", "收盘价", 
               "前收盘价", "涨跌额", "涨跌幅(%)", "成交量(股)", "成交额(元)", "换手率(%)"]
    
    data_list = [headers]
    data_dict = {header: [] for header in headers}
    
    for i in range(num_rows):
        stock_code = random.choice(stock_codes)
        
        # 生成时间 (从9:30到15:00)
        hour = random.randint(9, 14)
        minute = random.randint(0, 59)
        if hour == 9:
            minute = random.randint(30, 59)
        elif hour == 14:
            minute = random.randint(0, 57)
        
        timestamp = datetime(date.year, date.month, date.day, hour, minute, 
                            random.randint(0, 59))
        
        # 生成价格数据
        base_price = random.uniform(5, 500)
        open_price = round(base_price, 2)
        high_price = round(open_price * random.uniform(1.0, 1.1), 2)
        low_price = round(open_price * random.uniform(0.9, 1.0), 2)
        close_price = round(random.uniform(low_price, high_price), 2)
        
        # 计算涨跌幅
        prev_close = round(open_price * random.uniform(0.95, 1.05), 2)
        change = round(close_price - prev_close, 2)
        change_percent = round((change / prev_close) * 100, 2)
        
        # 生成成交量数据
        volume = random.randint(10000, 10000000)
        amount = round(volume * close_price, 2)
        
        # 换手率
        turnover_rate = round(random.uniform(0.1, 15.0), 2)
        
        # 列表格式数据
        row_list = [
            stock_code,
            timestamp,
            open_price,
            high_price,
            low_price,
            close_price,
            prev_close,
            change,
            change_percent,
            volume,
            amount,
            turnover_rate
        ]
        
        data_list.append(row_list)
        
        # DataFrame格式数据
        for header, value in zip(headers, row_list):
            data_dict[header].append(value)
    
    # 按时间排序 (跳过表头)
    data_list[1:] = sorted(data_list[1:], key=lambda x: x[1])
    
    # 创建DataFrame
    df = pd.DataFrame(data_dict)
    df = df.sort_values('时间').reset_index(drop=True)
    
    return data_list, df

def get_sheets_generator(data_dict):
    """
    生成流式ODS所需的sheets生成器
    
    Args:
        data_dict: 字典,key为sheet名,value为数据列表
    
    Yields:
        tuple: (sheet_name, headers, rows_generator)
    """
    for sheet_name, data in data_dict.items():
        headers = data[0]  # 第一行是表头
        rows = data[1:]    # 剩余行是数据
        
        def rows_generator():
            for row in rows:
                # 处理数据类型以确保兼容性
                processed_row = []
                for cell in row:
                    if isinstance(cell, (int, float)):
                        processed_row.append(cell)
                    elif isinstance(cell, datetime):
                        processed_row.append(cell)
                    elif isinstance(cell, Decimal):
                        processed_row.append(float(cell))
                    else:
                        processed_row.append(str(cell) if cell is not None else "")
                yield processed_row
        
        yield sheet_name, headers, rows_generator()

def save_as_ods_stream(filename, data_dict):
    """使用stream_write_ods保存为ODS格式"""
    if not STREAM_ODS_AVAILABLE:
        raise ImportError("stream-write-ods 未安装")
    
    sheets_generator = get_sheets_generator(data_dict)
    ods_chunks = stream_write_ods(sheets_generator)
    
    with open(filename, 'wb') as f:
        for chunk in ods_chunks:
            f.write(chunk)

def save_as_xlsb(filename, data_dict, compression_level=6):
    """使用pyxlsbwriter保存为xlsb格式"""
    if not XLSB_AVAILABLE:
        raise ImportError("pyxlsbwriter 未安装")
    
    with XlsbWriter(filename, compressionLevel=compression_level) as writer:
        for sheet_name, data in data_dict.items():
            writer.add_sheet(sheet_name)
            writer.write_sheet(data)

def save_as_ods_odswriter(filename, data_dict):
    """使用odswriter保存为ods格式"""
    if not ODSWRITER_AVAILABLE:
        raise ImportError("odswriter 未安装")
    
    with ods.writer(open(filename, "wb")) as odsfile:
        for sheet_name, data in data_dict.items():
            sheet = odsfile.new_sheet(sheet_name)
            for row in data:
                # 处理数据类型以确保兼容性
                processed_row = []
                for cell in row:
                    if isinstance(cell, (int, float)):
                        processed_row.append(cell)
                    elif isinstance(cell, datetime):
                        processed_row.append(cell)
                    elif isinstance(cell, Decimal):
                        processed_row.append(float(cell))
                    else:
                        processed_row.append(str(cell) if cell is not None else "")
                sheet.writerow(processed_row)

def save_as_ods_pyexcel(filename, data_dict):
    """使用pyexcel_ods3保存为ods格式"""
    if not PYEXCEL_ODS_AVAILABLE:
        raise ImportError("pyexcel_ods3 未安装")
    
    # 转换为OrderedDict格式
    ods_data = OrderedDict()
    for sheet_name, data in data_dict.items():
        ods_data[sheet_name] = data
    
    save_data(filename, ods_data)

def save_as_xlsx(filename, data_dict):
    """使用pandas保存为xlsx格式"""
    with pd.ExcelWriter(filename, engine='openpyxl') as writer:
        for sheet_name, data in data_dict.items():
            # 如果是列表格式,转换为DataFrame
            if isinstance(data, list):
                headers = data[0]
                rows = data[1:]
                df = pd.DataFrame(rows, columns=headers)
            else:
                df = data
            df.to_excel(writer, sheet_name=sheet_name, index=False)
            
            # 调整列宽
            worksheet = writer.sheets[sheet_name]
            for idx, col in enumerate(df.columns):
                column_width = max(df[col].astype(str).map(len).max(), len(col)) + 2
                worksheet.column_dimensions[chr(65 + idx)].width = min(column_width, 20)


def compare_all_ods_libraries(filename, num_sheets=3, rows_per_sheet=100):
    """比较所有可用的ODS库的性能"""
    available_libs = []
    if STREAM_ODS_AVAILABLE:
        available_libs.append(('stream_ods', save_as_ods_stream))
    if ODSWRITER_AVAILABLE:
        available_libs.append(('odswriter', save_as_ods_odswriter))
    if PYEXCEL_ODS_AVAILABLE:
        available_libs.append(('pyexcel_ods3', save_as_ods_pyexcel))
    
    if not available_libs:
        print("没有可用的ODS库")
        return
    
    # 生成测试数据
    test_data = {}
    for i in range(num_sheets):
        sheet_name = f"Sheet{i+1}"
        data_list, _ = generate_stock_data(datetime.now(), rows_per_sheet)
        test_data[sheet_name] = data_list
    
    print(f"ODS库性能比较: {num_sheets}个sheet, 每个{rows_per_sheet}行")
    print("-" * 50)
    
    results = []
    for lib_name, save_func in available_libs:
        test_file = f"test_{lib_name}.ods"
        try:
            start_time = time.time()
            save_func(test_file, test_data)
            save_time = time.time() - start_time
            file_size = os.path.getsize(test_file)
            
            results.append((lib_name, save_time, file_size))
            print(f"{lib_name:15} - 耗时: {save_time:6.2f}秒, 文件大小: {file_size:8}字节")
            
            # 清理测试文件
            try:
                #os.remove(test_file)
                pass
            except:
                pass
                
        except Exception as e:
            print(f"{lib_name:15} - 失败: {e}")
    
    if results:
        # 找出最快的库
        fastest = min(results, key=lambda x: x[1])
        print(f"\n最快的库: {fastest[0]} ({fastest[1]:.2f}秒)")

if __name__ == "__main__":
    #main()
    compare_all_ods_libraries("performance_test.ods", num_sheets=20, rows_per_sheet=1000)

测试结果

bash 复制代码
python odss.py
ODS库性能比较: 20个sheet, 每个1000行
--------------------------------------------------
stream_ods      - 耗时:   1.42秒, 文件大小:  1744087字节
odswriter       - 耗时:   4.56秒, 文件大小: 27465194字节
pyexcel_ods3    - 耗时:   3.02秒, 文件大小:  1091255字节

最快的库: stream_ods (1.42秒)

odswriter库基本上没有压缩,所以文件较大,stream_ods和pyexcel_ods3压缩率高的慢一些。

相关推荐
用户83562907805119 小时前
Python 操作 PDF 附件:添加、查看与管理指南
后端·python
宇宙之一粟1 天前
乐企版式文件生成平台
java·后端·python
学测绘的小杨2 天前
CompassFusion:一个从 GNSS 到 GNSS/INS 组合导航的独立工程包
python
zzzzzz3102 天前
当产品经理说这个很简单:我用Python自动化处理奇葩需求的实战指南
python·pycharm·产品经理
雪隐2 天前
个人电脑玩AI-06让5060 Ti给你打工——不光能画画,Qwen3-TTS还能学人说话,连我老板都信了!
人工智能·后端·python
兵慌码乱3 天前
面向桌面端的资产管理系统分层架构设计与核心模块实现
python·系统架构·sqlite·pyqt5·数据库设计·桌面应用开发·mvc架构
hboot3 天前
AI工程师第三课 - 机器学习基础
python·scikit-learn·kaggle
小白跃升坊3 天前
Codex 增强部署:基于 Codex++ 接入 DeepSeek
ai·ai编程·codex·deepseek·ai coding·codex++
大模型真好玩3 天前
什么是Loop Engineering?最通俗易懂的Loop Engineering核心概念
人工智能·agent·deepseek
顾林海3 天前
Agent入门阶段-编程基础-Python:流程控制
python·agent·ai编程