import os
import re
from pathlib import Path
from datetime import datetime
import baostock as bs
import pandas as pd
数据类型与Baostock查询函数的映射
DATA_TYPE_MAP = {
'profit': bs.query_profit_data, # 盈利能力
'operation': bs.query_operation_data, # 营运能力
'growth': bs.query_growth_data, # 成长能力
'balance': bs.query_balance_data, # 偿债能力
'cashflow': bs.query_cash_flow_data, # 现金流量
'dupont': bs.query_dupont_data, # 杜邦指数
'performance_express': bs.query_performance_express_report, # 业绩快报
'forecast': bs.query_forecast_report, # 业绩预告
}
数据类型的中文名称(用于Sheet名)
DATA_TYPE_NAMES = {
'profit': '盈利能力',
'operation': '营运能力',
'growth': '成长能力',
'balance': '偿债能力',
'cashflow': '现金流量',
'dupont': '杜邦指数',
'performance_express': '业绩快报',
'forecast': '业绩预告',
}
def get_user_input():
"""
交互式获取用户输入:分区、年份、季度等
返回:partition, years, quarters
"""
print("请选择数据下载分区(1-6):")
print("1 - 最新一期(最近一个完整季度)")
print("2 - 最近一年(最近4个季度)")
print("3 - 3年期间(最近12个季度)")
print("4 - 5年期(最近20个季度)")
print("5 - 自成立至今(从2000年至今)")
print("6 - 自由选择(手动输入年份和季度)")
while True:
try:
partition = int(input("请输入分区编号(1-6):").strip())
if partition in [1, 2, 3, 4, 5, 6]:
break
else:
print("输入无效,请输入1-6之间的数字。")
except ValueError:
print("输入无效,请输入数字。")
years, quarters = None, None
if partition == 6:
手动输入年份和季度
while True:
year_input = input("请输入年份(多个用逗号分隔,例如 2022,2023):").strip()
try:
years = [int(y.strip()) for y in year_input.split(',') if y.strip()]
if years:
break
else:
print("年份不能为空。")
except ValueError:
print("年份输入格式错误,请重新输入。")
while True:
quarter_input = input("请输入季度(多个用逗号分隔,例如 1,2,3,4):").strip()
try:
quarters = [int(q.strip()) for q in quarter_input.split(',') if q.strip()]
if all(q in [1, 2, 3, 4] for q in quarters):
break
else:
print("季度只能为1、2、3、4,请重新输入。")
except ValueError:
print("季度输入格式错误,请重新输入。")
return partition, years, quarters
def get_year_quarter_list(partition, years=None, quarters=None):
"""
根据分区生成需要下载的年份和季度列表
"""
now = datetime.now()
current_year = now.year
current_month = now.month
计算最近一个完整季度
if current_month <= 3:
latest_quarter = 4
latest_year = current_year - 1
elif current_month <= 6:
latest_quarter = 1
latest_year = current_year
elif current_month <= 9:
latest_quarter = 2
latest_year = current_year
else:
latest_quarter = 3
latest_year = current_year
if partition == 1: # 最新一期
return [(latest_year, latest_quarter)]
elif partition == 2: # 最近一年(4个季度)
quarters_needed = []
y, q = latest_year, latest_quarter
for _ in range(4):
quarters_needed.append((y, q))
q -= 1
if q == 0:
q = 4
y -= 1
return quarters_needed
elif partition == 3: # 3年期间(12个季度)
quarters_needed = []
y, q = latest_year, latest_quarter
for _ in range(12):
quarters_needed.append((y, q))
q -= 1
if q == 0:
q = 4
y -= 1
return quarters_needed
elif partition == 4: # 5年期(20个季度)
quarters_needed = []
y, q = latest_year, latest_quarter
for _ in range(20):
quarters_needed.append((y, q))
q -= 1
if q == 0:
q = 4
y -= 1
return quarters_needed
elif partition == 5: # 自成立至今(从2000年至今)
start_year = 2000 # 可根据需要调整起始年份
quarters_needed = []
for year in range(start_year, current_year + 1):
for quarter in [1, 2, 3, 4]:
if year == current_year and quarter > latest_quarter:
continue
quarters_needed.append((year, quarter))
return quarters_needed
elif partition == 6: # 自由选择
if years is None or quarters is None:
raise ValueError("分区6必须提供years和quarters参数")
return [(y, q) for y in years for q in quarters]
else:
raise ValueError(f"不支持的分区编号: {partition}")
def download_financial_data(
root_path,
excel_file="财报批量下载列表.xls",
data_types=None,
partition=6,
years=None,
quarters=None
):
"""
批量下载财务数据,每个股票生成一个Excel文件(包含多个Sheet)
"""
if data_types is None:
data_types = list(DATA_TYPE_MAP.keys())
校验数据类型
for dt in data_types:
if dt not in DATA_TYPE_MAP:
raise ValueError(f"不支持的数据类型: {dt},可选: {list(DATA_TYPE_MAP.keys())}")
获取季度列表
yq_list = get_year_quarter_list(partition, years, quarters)
print(f"根据分区 {partition} 生成 {len(yq_list)} 个季度需要下载")
root = Path(root_path)
excel_path = root / excel_file
创建财务报表目录(存放每个股票的Excel文件)
finance_dir = root / "财务报表"
finance_dir.mkdir(parents=True, exist_ok=True)
读取待下载股票列表
if not excel_path.exists():
raise FileNotFoundError(f"Excel文件不存在: {excel_path}")
try:
df = pd.read_excel(
excel_path,
sheet_name='待下载',
dtype={'证券代码': str},
engine='xlrd'
)
except ImportError:
print("错误:未安装 xlrd 库,无法读取 .xls 文件。请运行:pip install xlrd")
return
except Exception as e:
raise RuntimeError(f"读取Excel失败: {e}")
if '证券代码' not in df.columns:
raise ValueError("'待下载'工作表必须包含'证券代码'列")
df['证券代码'] = df['证券代码'].astype(str).str.strip()
if '证券名称' in df.columns:
df['证券名称'] = df['证券名称'].astype(str).str.strip()
else:
df['证券名称'] = ''
登录Baostock
lg = bs.login()
if lg.error_code != '0':
print(f"登录失败: {lg.error_msg}")
return
print("登录成功")
存储每个股票的数据:{ (code, name): { data_type: [df1, df2, ...] } }
stock_data = {}
遍历股票
for _, row in df.iterrows():
code = row['证券代码']
name = row['证券名称']
转换为Baostock格式
if '.' not in code:
if code.startswith('6'):
bs_code = f"sh.{code}"
elif code.startswith('0') or code.startswith('3'):
bs_code = f"sz.{code}"
else:
print(f"无法识别的代码格式: {code},跳过")
continue
else:
bs_code = code
初始化该股票的数据存储
stock_key = (code, name)
stock_data[stock_key] = {dt: [] for dt in data_types}
遍历数据类型
for data_type in data_types:
query_func = DATA_TYPE_MAP[data_type]
type_name_cn = DATA_TYPE_NAMES.get(data_type, data_type)
遍历季度
for year, quarter in yq_list:
print(f"正在下载: {code} {name} {year}Q{quarter} {type_name_cn}")
try:
rs = query_func(code=bs_code, year=year, quarter=quarter)
if rs.error_code != '0':
print(f" 查询失败: {rs.error_msg}")
continue
data_list = []
while rs.next():
data_list.append(rs.get_row_data())
if not data_list:
print(f" 无数据")
continue
转换为DataFrame
df_data = pd.DataFrame(data_list, columns=rs.fields)
添加标识列
df_data.insert(0, '股票代码', code)
df_data.insert(1, '股票名称', name)
df_data.insert(2, '年份', year)
df_data.insert(3, '季度', quarter)
追加到对应列表
stock_data[stock_key][data_type].append(df_data)
print(f" 已获取 {len(df_data)} 条记录")
except Exception as e:
print(f" 下载异常: {e}")
bs.logout()
为每个股票生成Excel文件
print("\n开始生成Excel文件...")
for (code, name), type_dfs in stock_data.items():
检查是否有任何数据
has_data = any(len(df_list) > 0 for df_list in type_dfs.values())
if not has_data:
print(f"股票 {code} {name} 无任何数据,跳过")
continue
计算时间范围(用于文件名)
all_years = set()
for df_list in type_dfs.values():
for df in df_list:
all_years.update(df['年份'].unique())
if all_years:
min_year = min(all_years)
max_year = max(all_years)
if min_year == max_year:
time_range = str(min_year)
else:
time_range = f"{min_year}-{max_year}"
else:
time_range = "unknown"
清理股票名称中的非法字符
safe_name = re.sub(r'[\\/*?:"<>|]', '', name).replace(' ', '')
文件名格式:财务报表-代码-名称-时间范围.xlsx
filename = f"财务报表-{code}-{safe_name}-{time_range}.xlsx"
file_path = finance_dir / filename
使用ExcelWriter写入多个Sheet
with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
for data_type, df_list in type_dfs.items():
if not df_list:
continue
合并该类型所有季度的数据
combined_df = pd.concat(df_list, ignore_index=True)
按年份、季度升序排列(由远及近)
combined_df = combined_df.sort_values(['年份', '季度'], ascending=[True, True])
重置索引
combined_df.reset_index(drop=True, inplace=True)
Sheet名使用中文
sheet_name = DATA_TYPE_NAMES.get(data_type, data_type)
写入Excel,避免Sheet名过长
combined_df.to_excel(writer, sheet_name=sheet_name[:31], index=False) # Excel Sheet名最长31字符
print(f" 写入Sheet: {sheet_name},共 {len(combined_df)} 行")
print(f"已生成: {file_path}")
print("全部完成!")
主程序入口
if name == "main":
自动获取脚本所在目录作为根目录
root_directory = Path(file).parent
交互获取用户输入
partition, years, quarters = get_user_input()
可以在此处让用户选择下载的数据类型(默认全部)
简单起见,默认下载所有类型
data_types = list(DATA_TYPE_MAP.keys())
调用下载函数
download_financial_data(
root_path=root_directory,
excel_file="财报批量下载列表.xls",
data_types=data_types,
partition=partition,
years=years,
quarters=quarters
)