DeepSeek辅助测试三种ODS电子表格写入程序

ODS是openoffice的电子表格格式,rusty_sheet插件也能读取,需要造一些数据,想知道有哪些程序能高效生成,在pypi上搜索发现,如下三种:

  1. odswriter
  2. pyexcel-ods3
  3. stream-write-ods

前两种是内存一次写入的,后一种是流式的。

以下是整合xlsx、xlsb、ODS三种格式的写入程序

python 复制代码
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
import time
import os
from decimal import Decimal
from collections import OrderedDict

# 尝试导入各种格式的库
try:
    from pyxlsbwriter import XlsbWriter
    XLSB_AVAILABLE = True
except ImportError:
    XLSB_AVAILABLE = False
    print("警告: pyxlsbwriter 未安装,无法生成xlsb格式文件")

try:
    import odswriter as ods
    ODSWRITER_AVAILABLE = True
except ImportError:
    ODSWRITER_AVAILABLE = False
    print("警告: odswriter 未安装,无法使用odswriter生成ods格式文件")

try:
    from pyexcel_ods3 import save_data
    PYEXCEL_ODS_AVAILABLE = True
except ImportError:
    PYEXCEL_ODS_AVAILABLE = False
    print("警告: pyexcel-ods3 未安装,无法使用pyexcel_ods3生成ods格式文件")

try:
    from stream_write_ods import stream_write_ods
    STREAM_ODS_AVAILABLE = True
except ImportError:
    STREAM_ODS_AVAILABLE = False
    print("警告: stream-write-ods 未安装,无法使用流式ODS写入")

def generate_stock_data(date, num_rows=10000):
    """
    生成指定日期的模拟股市行情数据
    
    Args:
        date: 日期 (datetime对象)
        num_rows: 数据行数
    
    Returns:
        tuple: (data_list, data_df) - 列表格式和DataFrame格式的数据
    """
    # 生成股票代码列表
    stock_codes = [f"STK{str(i).zfill(5)}" for i in range(1, 1001)]
    
    # 表头
    headers = ["股票代码", "时间", "开盘价", "最高价", "最低价", "收盘价", 
               "前收盘价", "涨跌额", "涨跌幅(%)", "成交量(股)", "成交额(元)", "换手率(%)"]
    
    data_list = [headers]
    data_dict = {header: [] for header in headers}
    
    for i in range(num_rows):
        stock_code = random.choice(stock_codes)
        
        # 生成时间 (从9:30到15:00)
        hour = random.randint(9, 14)
        minute = random.randint(0, 59)
        if hour == 9:
            minute = random.randint(30, 59)
        elif hour == 14:
            minute = random.randint(0, 57)
        
        timestamp = datetime(date.year, date.month, date.day, hour, minute, 
                            random.randint(0, 59))
        
        # 生成价格数据
        base_price = random.uniform(5, 500)
        open_price = round(base_price, 2)
        high_price = round(open_price * random.uniform(1.0, 1.1), 2)
        low_price = round(open_price * random.uniform(0.9, 1.0), 2)
        close_price = round(random.uniform(low_price, high_price), 2)
        
        # 计算涨跌幅
        prev_close = round(open_price * random.uniform(0.95, 1.05), 2)
        change = round(close_price - prev_close, 2)
        change_percent = round((change / prev_close) * 100, 2)
        
        # 生成成交量数据
        volume = random.randint(10000, 10000000)
        amount = round(volume * close_price, 2)
        
        # 换手率
        turnover_rate = round(random.uniform(0.1, 15.0), 2)
        
        # 列表格式数据
        row_list = [
            stock_code,
            timestamp,
            open_price,
            high_price,
            low_price,
            close_price,
            prev_close,
            change,
            change_percent,
            volume,
            amount,
            turnover_rate
        ]
        
        data_list.append(row_list)
        
        # DataFrame格式数据
        for header, value in zip(headers, row_list):
            data_dict[header].append(value)
    
    # 按时间排序 (跳过表头)
    data_list[1:] = sorted(data_list[1:], key=lambda x: x[1])
    
    # 创建DataFrame
    df = pd.DataFrame(data_dict)
    df = df.sort_values('时间').reset_index(drop=True)
    
    return data_list, df

def get_sheets_generator(data_dict):
    """
    生成流式ODS所需的sheets生成器
    
    Args:
        data_dict: 字典,key为sheet名,value为数据列表
    
    Yields:
        tuple: (sheet_name, headers, rows_generator)
    """
    for sheet_name, data in data_dict.items():
        headers = data[0]  # 第一行是表头
        rows = data[1:]    # 剩余行是数据
        
        def rows_generator():
            for row in rows:
                # 处理数据类型以确保兼容性
                processed_row = []
                for cell in row:
                    if isinstance(cell, (int, float)):
                        processed_row.append(cell)
                    elif isinstance(cell, datetime):
                        processed_row.append(cell)
                    elif isinstance(cell, Decimal):
                        processed_row.append(float(cell))
                    else:
                        processed_row.append(str(cell) if cell is not None else "")
                yield processed_row
        
        yield sheet_name, headers, rows_generator()

def save_as_ods_stream(filename, data_dict):
    """使用stream_write_ods保存为ODS格式"""
    if not STREAM_ODS_AVAILABLE:
        raise ImportError("stream-write-ods 未安装")
    
    sheets_generator = get_sheets_generator(data_dict)
    ods_chunks = stream_write_ods(sheets_generator)
    
    with open(filename, 'wb') as f:
        for chunk in ods_chunks:
            f.write(chunk)

def save_as_xlsb(filename, data_dict, compression_level=6):
    """使用pyxlsbwriter保存为xlsb格式"""
    if not XLSB_AVAILABLE:
        raise ImportError("pyxlsbwriter 未安装")
    
    with XlsbWriter(filename, compressionLevel=compression_level) as writer:
        for sheet_name, data in data_dict.items():
            writer.add_sheet(sheet_name)
            writer.write_sheet(data)

def save_as_ods_odswriter(filename, data_dict):
    """使用odswriter保存为ods格式"""
    if not ODSWRITER_AVAILABLE:
        raise ImportError("odswriter 未安装")
    
    with ods.writer(open(filename, "wb")) as odsfile:
        for sheet_name, data in data_dict.items():
            sheet = odsfile.new_sheet(sheet_name)
            for row in data:
                # 处理数据类型以确保兼容性
                processed_row = []
                for cell in row:
                    if isinstance(cell, (int, float)):
                        processed_row.append(cell)
                    elif isinstance(cell, datetime):
                        processed_row.append(cell)
                    elif isinstance(cell, Decimal):
                        processed_row.append(float(cell))
                    else:
                        processed_row.append(str(cell) if cell is not None else "")
                sheet.writerow(processed_row)

def save_as_ods_pyexcel(filename, data_dict):
    """使用pyexcel_ods3保存为ods格式"""
    if not PYEXCEL_ODS_AVAILABLE:
        raise ImportError("pyexcel_ods3 未安装")
    
    # 转换为OrderedDict格式
    ods_data = OrderedDict()
    for sheet_name, data in data_dict.items():
        ods_data[sheet_name] = data
    
    save_data(filename, ods_data)

def save_as_xlsx(filename, data_dict):
    """使用pandas保存为xlsx格式"""
    with pd.ExcelWriter(filename, engine='openpyxl') as writer:
        for sheet_name, data in data_dict.items():
            # 如果是列表格式,转换为DataFrame
            if isinstance(data, list):
                headers = data[0]
                rows = data[1:]
                df = pd.DataFrame(rows, columns=headers)
            else:
                df = data
            df.to_excel(writer, sheet_name=sheet_name, index=False)
            
            # 调整列宽
            worksheet = writer.sheets[sheet_name]
            for idx, col in enumerate(df.columns):
                column_width = max(df[col].astype(str).map(len).max(), len(col)) + 2
                worksheet.column_dimensions[chr(65 + idx)].width = min(column_width, 20)


def compare_all_ods_libraries(filename, num_sheets=3, rows_per_sheet=100):
    """比较所有可用的ODS库的性能"""
    available_libs = []
    if STREAM_ODS_AVAILABLE:
        available_libs.append(('stream_ods', save_as_ods_stream))
    if ODSWRITER_AVAILABLE:
        available_libs.append(('odswriter', save_as_ods_odswriter))
    if PYEXCEL_ODS_AVAILABLE:
        available_libs.append(('pyexcel_ods3', save_as_ods_pyexcel))
    
    if not available_libs:
        print("没有可用的ODS库")
        return
    
    # 生成测试数据
    test_data = {}
    for i in range(num_sheets):
        sheet_name = f"Sheet{i+1}"
        data_list, _ = generate_stock_data(datetime.now(), rows_per_sheet)
        test_data[sheet_name] = data_list
    
    print(f"ODS库性能比较: {num_sheets}个sheet, 每个{rows_per_sheet}行")
    print("-" * 50)
    
    results = []
    for lib_name, save_func in available_libs:
        test_file = f"test_{lib_name}.ods"
        try:
            start_time = time.time()
            save_func(test_file, test_data)
            save_time = time.time() - start_time
            file_size = os.path.getsize(test_file)
            
            results.append((lib_name, save_time, file_size))
            print(f"{lib_name:15} - 耗时: {save_time:6.2f}秒, 文件大小: {file_size:8}字节")
            
            # 清理测试文件
            try:
                #os.remove(test_file)
                pass
            except:
                pass
                
        except Exception as e:
            print(f"{lib_name:15} - 失败: {e}")
    
    if results:
        # 找出最快的库
        fastest = min(results, key=lambda x: x[1])
        print(f"\n最快的库: {fastest[0]} ({fastest[1]:.2f}秒)")

if __name__ == "__main__":
    #main()
    compare_all_ods_libraries("performance_test.ods", num_sheets=20, rows_per_sheet=1000)

测试结果

bash 复制代码
python odss.py
ODS库性能比较: 20个sheet, 每个1000行
--------------------------------------------------
stream_ods      - 耗时:   1.42秒, 文件大小:  1744087字节
odswriter       - 耗时:   4.56秒, 文件大小: 27465194字节
pyexcel_ods3    - 耗时:   3.02秒, 文件大小:  1091255字节

最快的库: stream_ods (1.42秒)

odswriter库基本上没有压缩,所以文件较大,stream_ods和pyexcel_ods3压缩率高的慢一些。

相关推荐
小北方城市网3 分钟前
第 6 课:云原生架构终极落地|K8s 全栈编排与高可用架构设计实战
大数据·人工智能·python·云原生·架构·kubernetes·geo
智航GIS6 分钟前
10.1 网站防爬与伪装策略
python
belldeep13 分钟前
python:pyTorch 入门教程
pytorch·python·ai·torch
YJlio15 分钟前
Registry Usage (RU) 学习笔记(15.5):注册表内存占用体检与 Hive 体量分析
服务器·windows·笔记·python·学习·tcp/ip·django
奔波霸的伶俐虫16 分钟前
redisTemplate.opsForList()里面方法怎么用
java·开发语言·数据库·python·sql
longze_723 分钟前
生成式UI与未来AI交互变革
人工智能·python·ai·ai编程·cursor·蓝湖
weixin_4380774926 分钟前
CS336 Assignment 4 (data): Filtering Language Modeling Data 翻译和实现
人工智能·python·语言模型·自然语言处理
小郭团队27 分钟前
未来PLC会消失吗?会被嵌入式系统取代吗?
c语言·人工智能·python·嵌入式硬件·架构
yesyesido27 分钟前
智能文件格式转换器:文本/Excel与CSV无缝互转的在线工具
开发语言·python·excel
王夏奇31 分钟前
python在汽车电子行业中的应用1-基础知识概念
开发语言·python·汽车