DeepSeek辅助测试三种ODS电子表格写入程序

ODS是openoffice的电子表格格式,rusty_sheet插件也能读取,需要造一些数据,想知道有哪些程序能高效生成,在pypi上搜索发现,如下三种:

  1. odswriter
  2. pyexcel-ods3
  3. stream-write-ods

前两种是内存一次写入的,后一种是流式的。

以下是整合xlsx、xlsb、ODS三种格式的写入程序

python 复制代码
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
import time
import os
from decimal import Decimal
from collections import OrderedDict

# 尝试导入各种格式的库
try:
    from pyxlsbwriter import XlsbWriter
    XLSB_AVAILABLE = True
except ImportError:
    XLSB_AVAILABLE = False
    print("警告: pyxlsbwriter 未安装,无法生成xlsb格式文件")

try:
    import odswriter as ods
    ODSWRITER_AVAILABLE = True
except ImportError:
    ODSWRITER_AVAILABLE = False
    print("警告: odswriter 未安装,无法使用odswriter生成ods格式文件")

try:
    from pyexcel_ods3 import save_data
    PYEXCEL_ODS_AVAILABLE = True
except ImportError:
    PYEXCEL_ODS_AVAILABLE = False
    print("警告: pyexcel-ods3 未安装,无法使用pyexcel_ods3生成ods格式文件")

try:
    from stream_write_ods import stream_write_ods
    STREAM_ODS_AVAILABLE = True
except ImportError:
    STREAM_ODS_AVAILABLE = False
    print("警告: stream-write-ods 未安装,无法使用流式ODS写入")

def generate_stock_data(date, num_rows=10000):
    """
    生成指定日期的模拟股市行情数据
    
    Args:
        date: 日期 (datetime对象)
        num_rows: 数据行数
    
    Returns:
        tuple: (data_list, data_df) - 列表格式和DataFrame格式的数据
    """
    # 生成股票代码列表
    stock_codes = [f"STK{str(i).zfill(5)}" for i in range(1, 1001)]
    
    # 表头
    headers = ["股票代码", "时间", "开盘价", "最高价", "最低价", "收盘价", 
               "前收盘价", "涨跌额", "涨跌幅(%)", "成交量(股)", "成交额(元)", "换手率(%)"]
    
    data_list = [headers]
    data_dict = {header: [] for header in headers}
    
    for i in range(num_rows):
        stock_code = random.choice(stock_codes)
        
        # 生成时间 (从9:30到15:00)
        hour = random.randint(9, 14)
        minute = random.randint(0, 59)
        if hour == 9:
            minute = random.randint(30, 59)
        elif hour == 14:
            minute = random.randint(0, 57)
        
        timestamp = datetime(date.year, date.month, date.day, hour, minute, 
                            random.randint(0, 59))
        
        # 生成价格数据
        base_price = random.uniform(5, 500)
        open_price = round(base_price, 2)
        high_price = round(open_price * random.uniform(1.0, 1.1), 2)
        low_price = round(open_price * random.uniform(0.9, 1.0), 2)
        close_price = round(random.uniform(low_price, high_price), 2)
        
        # 计算涨跌幅
        prev_close = round(open_price * random.uniform(0.95, 1.05), 2)
        change = round(close_price - prev_close, 2)
        change_percent = round((change / prev_close) * 100, 2)
        
        # 生成成交量数据
        volume = random.randint(10000, 10000000)
        amount = round(volume * close_price, 2)
        
        # 换手率
        turnover_rate = round(random.uniform(0.1, 15.0), 2)
        
        # 列表格式数据
        row_list = [
            stock_code,
            timestamp,
            open_price,
            high_price,
            low_price,
            close_price,
            prev_close,
            change,
            change_percent,
            volume,
            amount,
            turnover_rate
        ]
        
        data_list.append(row_list)
        
        # DataFrame格式数据
        for header, value in zip(headers, row_list):
            data_dict[header].append(value)
    
    # 按时间排序 (跳过表头)
    data_list[1:] = sorted(data_list[1:], key=lambda x: x[1])
    
    # 创建DataFrame
    df = pd.DataFrame(data_dict)
    df = df.sort_values('时间').reset_index(drop=True)
    
    return data_list, df

def get_sheets_generator(data_dict):
    """
    生成流式ODS所需的sheets生成器
    
    Args:
        data_dict: 字典,key为sheet名,value为数据列表
    
    Yields:
        tuple: (sheet_name, headers, rows_generator)
    """
    for sheet_name, data in data_dict.items():
        headers = data[0]  # 第一行是表头
        rows = data[1:]    # 剩余行是数据
        
        def rows_generator():
            for row in rows:
                # 处理数据类型以确保兼容性
                processed_row = []
                for cell in row:
                    if isinstance(cell, (int, float)):
                        processed_row.append(cell)
                    elif isinstance(cell, datetime):
                        processed_row.append(cell)
                    elif isinstance(cell, Decimal):
                        processed_row.append(float(cell))
                    else:
                        processed_row.append(str(cell) if cell is not None else "")
                yield processed_row
        
        yield sheet_name, headers, rows_generator()

def save_as_ods_stream(filename, data_dict):
    """使用stream_write_ods保存为ODS格式"""
    if not STREAM_ODS_AVAILABLE:
        raise ImportError("stream-write-ods 未安装")
    
    sheets_generator = get_sheets_generator(data_dict)
    ods_chunks = stream_write_ods(sheets_generator)
    
    with open(filename, 'wb') as f:
        for chunk in ods_chunks:
            f.write(chunk)

def save_as_xlsb(filename, data_dict, compression_level=6):
    """使用pyxlsbwriter保存为xlsb格式"""
    if not XLSB_AVAILABLE:
        raise ImportError("pyxlsbwriter 未安装")
    
    with XlsbWriter(filename, compressionLevel=compression_level) as writer:
        for sheet_name, data in data_dict.items():
            writer.add_sheet(sheet_name)
            writer.write_sheet(data)

def save_as_ods_odswriter(filename, data_dict):
    """使用odswriter保存为ods格式"""
    if not ODSWRITER_AVAILABLE:
        raise ImportError("odswriter 未安装")
    
    with ods.writer(open(filename, "wb")) as odsfile:
        for sheet_name, data in data_dict.items():
            sheet = odsfile.new_sheet(sheet_name)
            for row in data:
                # 处理数据类型以确保兼容性
                processed_row = []
                for cell in row:
                    if isinstance(cell, (int, float)):
                        processed_row.append(cell)
                    elif isinstance(cell, datetime):
                        processed_row.append(cell)
                    elif isinstance(cell, Decimal):
                        processed_row.append(float(cell))
                    else:
                        processed_row.append(str(cell) if cell is not None else "")
                sheet.writerow(processed_row)

def save_as_ods_pyexcel(filename, data_dict):
    """使用pyexcel_ods3保存为ods格式"""
    if not PYEXCEL_ODS_AVAILABLE:
        raise ImportError("pyexcel_ods3 未安装")
    
    # 转换为OrderedDict格式
    ods_data = OrderedDict()
    for sheet_name, data in data_dict.items():
        ods_data[sheet_name] = data
    
    save_data(filename, ods_data)

def save_as_xlsx(filename, data_dict):
    """使用pandas保存为xlsx格式"""
    with pd.ExcelWriter(filename, engine='openpyxl') as writer:
        for sheet_name, data in data_dict.items():
            # 如果是列表格式,转换为DataFrame
            if isinstance(data, list):
                headers = data[0]
                rows = data[1:]
                df = pd.DataFrame(rows, columns=headers)
            else:
                df = data
            df.to_excel(writer, sheet_name=sheet_name, index=False)
            
            # 调整列宽
            worksheet = writer.sheets[sheet_name]
            for idx, col in enumerate(df.columns):
                column_width = max(df[col].astype(str).map(len).max(), len(col)) + 2
                worksheet.column_dimensions[chr(65 + idx)].width = min(column_width, 20)


def compare_all_ods_libraries(filename, num_sheets=3, rows_per_sheet=100):
    """比较所有可用的ODS库的性能"""
    available_libs = []
    if STREAM_ODS_AVAILABLE:
        available_libs.append(('stream_ods', save_as_ods_stream))
    if ODSWRITER_AVAILABLE:
        available_libs.append(('odswriter', save_as_ods_odswriter))
    if PYEXCEL_ODS_AVAILABLE:
        available_libs.append(('pyexcel_ods3', save_as_ods_pyexcel))
    
    if not available_libs:
        print("没有可用的ODS库")
        return
    
    # 生成测试数据
    test_data = {}
    for i in range(num_sheets):
        sheet_name = f"Sheet{i+1}"
        data_list, _ = generate_stock_data(datetime.now(), rows_per_sheet)
        test_data[sheet_name] = data_list
    
    print(f"ODS库性能比较: {num_sheets}个sheet, 每个{rows_per_sheet}行")
    print("-" * 50)
    
    results = []
    for lib_name, save_func in available_libs:
        test_file = f"test_{lib_name}.ods"
        try:
            start_time = time.time()
            save_func(test_file, test_data)
            save_time = time.time() - start_time
            file_size = os.path.getsize(test_file)
            
            results.append((lib_name, save_time, file_size))
            print(f"{lib_name:15} - 耗时: {save_time:6.2f}秒, 文件大小: {file_size:8}字节")
            
            # 清理测试文件
            try:
                #os.remove(test_file)
                pass
            except:
                pass
                
        except Exception as e:
            print(f"{lib_name:15} - 失败: {e}")
    
    if results:
        # 找出最快的库
        fastest = min(results, key=lambda x: x[1])
        print(f"\n最快的库: {fastest[0]} ({fastest[1]:.2f}秒)")

if __name__ == "__main__":
    #main()
    compare_all_ods_libraries("performance_test.ods", num_sheets=20, rows_per_sheet=1000)

测试结果

bash 复制代码
python odss.py
ODS库性能比较: 20个sheet, 每个1000行
--------------------------------------------------
stream_ods      - 耗时:   1.42秒, 文件大小:  1744087字节
odswriter       - 耗时:   4.56秒, 文件大小: 27465194字节
pyexcel_ods3    - 耗时:   3.02秒, 文件大小:  1091255字节

最快的库: stream_ods (1.42秒)

odswriter库基本上没有压缩,所以文件较大,stream_ods和pyexcel_ods3压缩率高的慢一些。

相关推荐
liwulin050636 分钟前
【PYTHON-YOLOV8N】关于YOLO的推理训练图片的尺寸
开发语言·python·yolo
我送炭你添花1 小时前
Pelco KBD300A 模拟器:04+1.Python 打包详解:历史、发展与多种方式对比
python·测试工具·运维开发
yaoh.wang1 小时前
力扣(LeetCode) 27: 移除元素 - 解法思路
python·程序人生·算法·leetcode·面试·职场和发展·双指针
幸存者letp1 小时前
Python 常用方法分类大全
linux·服务器·python
2401_841495641 小时前
【自然语言处理】中文 n-gram 词模型
人工智能·python·算法·自然语言处理·n-gram·中文文本生成模型·kneser-ney平滑
嘻哈baby1 小时前
为什么越来越多的人放弃Flask转向FastAPI?
python
ss2732 小时前
Java并发编程:DelayQueue延迟订单系统
java·python·算法
JHC0000002 小时前
118. 杨辉三角
python·算法·面试
@游子2 小时前
Python类属性与魔术方法全解析
开发语言·python
yuhaiqun19892 小时前
Typora 技能进阶:从会写 Markdown 到玩转配置 + 插件高效学习笔记
经验分享·笔记·python·学习·学习方法·ai编程·markdown