Excel自动化操作详细文档04

批量处理文件

1.1 批量读取Excel文件

Python操作:

python 复制代码
import pandas as pd
import os
from pathlib import Path

# 读取文件夹中所有Excel文件
folder_path = 'data'
all_files = Path(folder_path).glob('*.xlsx')

# 合并所有文件
df_list = []
for file in all_files:
    df = pd.read_excel(file)
    df['来源文件'] = file.name
    df_list.append(df)

# 合并为一个DataFrame
combined_df = pd.concat(df_list, ignore_index=True)
combined_df.to_excel('合并结果.xlsx', index=False)

print(f'共处理 {len(df_list)} 个文件')

1.2 批量转换格式

Excel转CSV:

python 复制代码
import pandas as pd
from pathlib import Path

folder_path = 'excel_files'
output_folder = 'csv_files'
Path(output_folder).mkdir(exist_ok=True)

for excel_file in Path(folder_path).glob('*.xlsx'):
    df = pd.read_excel(excel_file)
    csv_file = Path(output_folder) / f'{excel_file.stem}.csv'
    df.to_csv(csv_file, index=False, encoding='utf-8-sig')
    print(f'已转换: {excel_file.name} -> {csv_file.name}')

CSV转Excel:

python 复制代码
import pandas as pd
from pathlib import Path

folder_path = 'csv_files'
output_folder = 'excel_files'
Path(output_folder).mkdir(exist_ok=True)

for csv_file in Path(folder_path).glob('*.csv'):
    df = pd.read_csv(csv_file)
    excel_file = Path(output_folder) / f'{csv_file.stem}.xlsx'
    df.to_excel(excel_file, index=False)
    print(f'已转换: {csv_file.name} -> {excel_file.name}')

1.3 批量修改文件

批量添加列:

python 复制代码
import pandas as pd
from pathlib import Path
from datetime import datetime

folder_path = 'data'

for excel_file in Path(folder_path).glob('*.xlsx'):
    df = pd.read_excel(excel_file)

    # 添加处理时间列
    df['处理时间'] = datetime.now()

    # 添加计算列
    if '数量' in df.columns and '单价' in df.columns:
        df['总价'] = df['数量'] * df['单价']

    # 保存
    df.to_excel(excel_file, index=False)
    print(f'已处理: {excel_file.name}')

1.4 批量拆分文件

按列值拆分:

python 复制代码
import pandas as pd
from pathlib import Path

df = pd.read_excel('总数据.xlsx')

# 按部门拆分
output_folder = Path('按部门拆分')
output_folder.mkdir(exist_ok=True)

for dept in df['部门'].unique():
    dept_df = df[df['部门'] == dept]
    output_file = output_folder / f'{dept}.xlsx'
    dept_df.to_excel(output_file, index=False)
    print(f'已生成: {output_file.name}, 共 {len(dept_df)} 条记录')

按行数拆分:

python 复制代码
import pandas as pd
from pathlib import Path

df = pd.read_excel('大文件.xlsx')

# 每1000行拆分一个文件
chunk_size = 1000
output_folder = Path('拆分文件')
output_folder.mkdir(exist_ok=True)

for i in range(0, len(df), chunk_size):
    chunk_df = df.iloc[i:i+chunk_size]
    output_file = output_folder / f'part_{i//chunk_size + 1}.xlsx'
    chunk_df.to_excel(output_file, index=False)
    print(f'已生成: {output_file.name}')

自动化报表生成

2.1 生成格式化报表

Python操作:

python 复制代码
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side

# 读取数据
df = pd.read_excel('原始数据.xlsx')

# 数据处理
summary = df.groupby('部门').agg({
    '销售额': 'sum',
    '订单数': 'count',
    '客户数': 'nunique'
}).reset_index()

# 保存到Excel
output_file = '销售报表.xlsx'
summary.to_excel(output_file, index=False, sheet_name='汇总')

# 格式化
wb = load_workbook(output_file)
ws = wb['汇总']

# 设置标题样式
header_fill = PatternFill(start_color='4472C4', fill_type='solid')
header_font = Font(color='FFFFFF', bold=True)

for cell in ws[1]:
    cell.fill = header_fill
    cell.font = header_font
    cell.alignment = Alignment(horizontal='center')

# 设置边框
thin_border = Border(
    left=Side(style='thin'),
    right=Side(style='thin'),
    top=Side(style='thin'),
    bottom=Side(style='thin')
)

for row in ws.iter_rows(min_row=1, max_row=ws.max_row, min_col=1, max_col=ws.max_column):
    for cell in row:
        cell.border = thin_border

# 调整列宽
for column in ws.columns:
    max_length = 0
    column_letter = column[0].column_letter
    for cell in column:
        if cell.value:
            max_length = max(max_length, len(str(cell.value)))
    ws.column_dimensions[column_letter].width = max_length + 2

wb.save(output_file)
print(f'报表已生成: {output_file}')

2.2 生成多工作表报表

Python操作:

python 复制代码
import pandas as pd
from datetime import datetime

df = pd.read_excel('销售数据.xlsx')

# 创建Excel写入器
output_file = f'销售报表_{datetime.now().strftime("%Y%m%d")}.xlsx'
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:

    # 总览表
    summary = df.groupby('部门')['销售额'].sum().reset_index()
    summary.to_excel(writer, sheet_name='总览', index=False)

    # 按部门分表
    for dept in df['部门'].unique():
        dept_df = df[df['部门'] == dept]
        dept_df.to_excel(writer, sheet_name=dept, index=False)

    # 趋势分析
    df['日期'] = pd.to_datetime(df['日期'])
    trend = df.groupby(df['日期'].dt.to_period('M'))['销售额'].sum().reset_index()
    trend.to_excel(writer, sheet_name='月度趋势', index=False)

print(f'多工作表报表已生成: {output_file}')

2.3 生成带图表的报表

Python操作:

python 复制代码
import pandas as pd
from openpyxl import load_workbook
from openpyxl.chart import BarChart, Reference

# 准备数据
df = pd.read_excel('销售数据.xlsx')
summary = df.groupby('产品')['销售额'].sum().reset_index()

# 保存数据
output_file = '销售报表_带图表.xlsx'
summary.to_excel(output_file, index=False, sheet_name='数据')

# 添加图表
wb = load_workbook(output_file)
ws = wb['数据']

chart = BarChart()
chart.title = "产品销售额"
chart.x_axis.title = "产品"
chart.y_axis.title = "销售��"

data = Reference(ws, min_col=2, min_row=1, max_row=ws.max_row)
cats = Reference(ws, min_col=1, min_row=2, max_row=ws.max_row)
chart.add_data(data, titles_from_data=True)
chart.set_categories(cats)

ws.add_chart(chart, "D2")

wb.save(output_file)
print(f'带图表的报表已生成: {output_file}')

定时任务

3.1 使用schedule库

Python操作:

python 复制代码
import schedule
import time
import pandas as pd
from datetime import datetime

def generate_daily_report():
    """生成每日报表"""
    df = pd.read_excel('实时数据.xlsx')

    # 数据处理
    summary = df.groupby('类别')['数量'].sum().reset_index()

    # 保存报表
    output_file = f'日报_{datetime.now().strftime("%Y%m%d")}.xlsx'
    summary.to_excel(output_file, index=False)

    print(f'{datetime.now()}: 日报已生成 - {output_file}')

# 每天上午9点执行
schedule.every().day.at("09:00").do(generate_daily_report)

# 每小时执行
schedule.every().hour.do(generate_daily_report)

# 每周一执行
schedule.every().monday.at("09:00").do(generate_daily_report)

print('定时任务已启动...')
while True:
    schedule.run_pending()
    time.sleep(60)

3.2 使用Windows任务计划程序

创建批处理文件 (run_report.bat):

batch 复制代码
@echo off
cd /d D:\Reports
python generate_report.py
pause

Python脚本 (generate_report.py):

python 复制代码
import pandas as pd
from datetime import datetime
import logging

# 配置日志
logging.basicConfig(
    filename='report_log.txt',
    level=logging.INFO,
    format='%(asctime)s - %(message)s'
)

try:
    # 生成报表
    df = pd.read_excel('数据源.xlsx')
    summary = df.groupby('部门')['销售额'].sum().reset_index()

    output_file = f'报表_{datetime.now().strftime("%Y%m%d_%H%M%S")}.xlsx'
    summary.to_excel(output_file, index=False)

    logging.info(f'报表生成成功: {output_file}')
    print(f'报表生成成功: {output_file}')

except Exception as e:
    logging.error(f'报表生成失败: {str(e)}')
    print(f'报表生成失败: {str(e)}')

设置Windows任务计划:

  1. 打开"任务计划程序"
  2. 创建基本任务
  3. 设置触发器(每天、每周等)
  4. 操作:启动程序 → 选择 run_report.bat
  5. 完成设置

邮件自动发送

4.1 发送Excel附件

Python操作:

python 复制代码
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
import pandas as pd
from datetime import datetime

def send_report_email(excel_file, recipients):
    """发送Excel报表邮件"""

    # 邮件配置
    sender_email = "your_email@example.com"
    sender_password = "your_password"
    smtp_server = "smtp.example.com"
    smtp_port = 587

    # 创建邮件
    msg = MIMEMultipart()
    msg['From'] = sender_email
    msg['To'] = ', '.join(recipients)
    msg['Subject'] = f'销售报表 - {datetime.now().strftime("%Y-%m-%d")}'

    # 邮件正文
    body = """
    您好,

    附件是今日的销售报表,请查收。

    此邮件由系统自动发送,请勿回复。
    """
    msg.attach(MIMEText(body, 'plain', 'utf-8'))

    # 添加附件
    with open(excel_file, 'rb') as f:
        part = MIMEBase('application', 'octet-stream')
        part.set_payload(f.read())
        encoders.encode_base64(part)
        part.add_header('Content-Disposition', f'attachment; filename={excel_file}')
        msg.attach(part)

    # 发送邮件
    try:
        server = smtplib.SMTP(smtp_server, smtp_port)
        server.starttls()
        server.login(sender_email, sender_password)
        server.send_message(msg)
        server.quit()
        print(f'邮件发送成功: {excel_file}')
    except Exception as e:
        print(f'邮件发送失败: {str(e)}')

# 使用示例
excel_file = '销售报表.xlsx'
recipients = ['user1@example.com', 'user2@example.com']
send_report_email(excel_file, recipients)

4.2 发送HTML格式邮件

Python操作:

python 复制代码
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import pandas as pd

def send_html_report(df, recipients):
    """发送HTML格式的数据表格"""

    sender_email = "your_email@example.com"
    sender_password = "your_password"

    msg = MIMEMultipart('alternative')
    msg['Subject'] = '销售数据报表'
    msg['From'] = sender_email
    msg['To'] = ', '.join(recipients)

    # 将DataFrame转为HTML
    html_table = df.to_html(index=False, border=1)

    html_body = f"""
    <html>
      <head>
        <style>
          table {{ border-collapse: collapse; width: 100%; }}
          th {{ background-color: #4472C4; color: white; padding: 8px; }}
          td {{ padding: 8px; border: 1px solid #ddd; }}
          tr:nth-child(even) {{ background-color: #f2f2f2; }}
        </style>
      </head>
      <body>
        <h2>销售数据报表</h2>
        {html_table}
      </body>
    </html>
    """

    msg.attach(MIMEText(html_body, 'html', 'utf-8'))

    # 发送
    server = smtplib.SMTP('smtp.example.com', 587)
    server.starttls()
    server.login(sender_email, sender_password)
    server.send_message(msg)
    server.quit()

    print('HTML邮件发送成功')

# 使用示例
df = pd.read_excel('销售数据.xlsx')
recipients = ['user@example.com']
send_html_report(df, recipients)

数据库集成

5.1 从数据库读取到Excel

Python操作:

python 复制代码
import pandas as pd
import sqlite3

# SQLite示例
conn = sqlite3.connect('database.db')
query = "SELECT * FROM sales WHERE date >= '2024-01-01'"
df = pd.read_sql_query(query, conn)
conn.close()

df.to_excel('数据库导出.xlsx', index=False)
print(f'已导出 {len(df)} 条记录')

MySQL示例:

python 复制代码
import pandas as pd
import pymysql

conn = pymysql.connect(
    host='localhost',
    user='username',
    password='password',
    database='mydb'
)

query = "SELECT * FROM sales"
df = pd.read_sql(query, conn)
conn.close()

df.to_excel('MySQL导出.xlsx', index=False)

5.2 从Excel导入到数据库

Python操作:

python 复制代码
import pandas as pd
import sqlite3

# 读取Excel
df = pd.read_excel('数据.xlsx')

# 导入到SQLite
conn = sqlite3.connect('database.db')
df.to_sql('sales', conn, if_exists='replace', index=False)
conn.close()

print(f'已导入 {len(df)} 条记录到数据库')

Web数据抓取

6.1 抓取网页表格

Python操作:

python 复制代码
import pandas as pd
import requests
from bs4 import BeautifulSoup

url = 'https://example.com/data'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# 查找表格
table = soup.find('table')

# 转换为DataFrame
df = pd.read_html(str(table))[0]

# 保存到Excel
df.to_excel('网页数据.xlsx', index=False)
print('网页数据已保存')

6.2 API数据获取

Python操作:

python 复制代码
import pandas as pd
import requests

# 调用API
url = 'https://api.example.com/data'
headers = {'Authorization': 'Bearer YOUR_TOKEN'}
response = requests.get(url, headers=headers)

# 解析JSON
data = response.json()
df = pd.DataFrame(data['results'])

# 保存到Excel
df.to_excel('API数据.xlsx', index=False)
print(f'已获取 {len(df)} 条记录')

完整自动化示例

综合案例:每日销售报表自动化

Python操作:

python 复制代码
import pandas as pd
import schedule
import time
from datetime import datetime
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email import encoders

def generate_and_send_report():
    """生成并发送每日销售报表"""

    try:
        # 1. 读取数据
        df = pd.read_excel('销售数据.xlsx')

        # 2. 数据处理
        today = datetime.now().date()
        df['日期'] = pd.to_datetime(df['日期']).dt.date
        today_data = df[df['日期'] == today]

        summary = today_data.groupby('产品').agg({
            '销售额': 'sum',
            '数量': 'sum'
        }).reset_index()

        # 3. 生成Excel报表
        output_file = f'日报_{datetime.now().strftime("%Y%m%d")}.xlsx'
        summary.to_excel(output_file, index=False)

        # 4. 格式化
        wb = load_workbook(output_file)
        ws = wb.active

        for cell in ws[1]:
            cell.fill = PatternFill(start_color='4472C4', fill_type='solid')
            cell.font = Font(color='FFFFFF', bold=True)

        wb.save(output_file)

        # 5. 发送邮件
        send_email(output_file)

        print(f'{datetime.now()}: 报表生成并发送成功')

    except Exception as e:
        print(f'{datetime.now()}: 错误 - {str(e)}')

def send_email(file_path):
    """发送邮件"""
    # 邮件发送逻辑
    pass

# 设置定时任务:每天上午9点执行
schedule.every().day.at("09:00").do(generate_and_send_report)

print('自动化任务已启动...')
while True:
    schedule.run_pending()
    time.sleep(60)
相关推荐
梁萌2 小时前
Jenkins流水线配置的两种方式
运维·jenkins·jenkinsfile·流水线配置
终端行者2 小时前
Nginx 反向代理与负载均衡配置 反向代理与负载均衡配置参数
运维·nginx·负载均衡
oMcLin2 小时前
CentOS 7.6 磁盘空间不足导致服务崩溃:如何有效清理日志文件和临时文件
linux·运维·centos
筑梦之路2 小时前
Jenkins 构建部署多模块Java应用流水线参考——筑梦之路
java·运维·jenkins
金牌归来发现妻女流落街头2 小时前
【线程池 + Socket 服务器】
java·运维·服务器·多线程
大模型铲屎官2 小时前
【操作系统-Day 47】揭秘Linux文件系统基石:图解索引分配(inode)与多级索引
linux·运维·服务器·人工智能·python·操作系统·计算机组成原理
乾元2 小时前
Network-as-Code:把 HCIE / CCIE 实验脚本转为企业级 CI 工程化流程
运维·网络·人工智能·安全·web安全·ai·架构
jiayong232 小时前
Excel高级功能详细文档03
excel
拾光Ծ2 小时前
Linux 进程控制:进程终止与等待・waitpid 选项参数与状态解析(告别僵尸进程)
linux·运维·服务器·进程控制