前言
量化多因子研究需要从tushare下载数据,且每次都要重新获取,并且每次都需要对数据格式进行排版,这样对我们专心的做研究很麻烦,直接构建一个本地数据库,需要数据直接做一个sql查询,把结果拼好返回然后在python读取,这样简化了我们的繁琐的数据获取流程,且可以定制我们需要的数据格式,肥肠好用~
数据库设计
目前,比较简单的做法就是把tushare中的数据直接存下来作为一张表,表名与接口名一致,这样也不需要自己写文档,然后需要的数据格式让GPT写sql即可。具体来说我整理了一下表,足以应付常见的投研情况,如需要更高级的数据,自己再设计相应的表即可。目前的数据表如下:
- 复权因子 adj_factor:
- 历史日线 daily: https://tushare.pro/document/2?doc_id=27
- 每日行情 daily_basic: https://tushare.pro/document/2?doc_id=32
- 财务数据 fina_indicator: https://tushare.pro/document/2?doc_id=79
- 股票基础数据 stock_basic:https://tushare.pro/document/2?doc_id=25
- 万申行业数据 index_member_all:https://tushare.pro/document/2?doc_id=335
- 现金流数据 cashflow:// TODO
表设计
表设计要求把常用的tushare api作为一个表,获取其所有的数据。对于一个表,我们通常讲交易日期trade_date和ts_code作为双主键唯一确定一条数据。另外我们加入了两个额外的列,即更新时间和数据源。数据源默认为tushare方便以后扩展数据源。
表设计的代码参考如下:
- 复权因子 adj_factor
sql
create table adj_factor
(
ts_code varchar(10) not null comment '股票代码',
trade_date date not null comment '交易日期',
adj_factor float null comment '复权因子',
source varchar(60) null comment '数据源',
update_time datetime default CURRENT_TIMESTAMP not null comment '更新时间',
primary key (ts_code, trade_date)
)
comment 'Tushare复权因子表' engine = InnoDB collate = utf8mb4_general_ci;
- 历史日线 daily
sql
create table daily
(
ts_code varchar(10) not null comment '股票代码',
trade_date date not null comment '交易日期',
open float null comment '开盘价',
high float null comment '最高价',
low float null comment '最低价',
close float null comment '收盘价',
pre_close float null comment '昨收价(前复权)',
`change` float null comment '涨跌额',
pct_chg float null comment '涨跌幅(基于除权后的昨收计算)',
vol float null comment '成交量(手)',
amount float null comment '成交额(千元)',
source varchar(20) null,
update_time datetime default CURRENT_TIMESTAMP not null,
primary key (ts_code, trade_date)
)comment 'Tushare日行情数据表' engine = InnoDB collate = utf8mb4_general_ci;
- 每日行情 daily_basic
sql
create table daily_basic
(
ts_code char(9) not null comment 'TS股票代码',
trade_date date not null comment '交易日期 (YYYYMMDD)',
close double null comment '当日收盘价',
turnover_rate double null comment '换手率(%)',
turnover_rate_f double null comment '换手率(自由流通股)',
volume_ratio double null comment '量比',
pe double null comment '市盈率(总市值/净利润)',
pe_ttm double null comment '市盈率(TTM)',
pb double null comment '市净率(总市值/净资产)',
ps double null comment '市销率',
ps_ttm double null comment '市销率(TTM)',
dv_ratio double null comment '股息率(%)',
dv_ttm double null comment '股息率(TTM)(%)',
total_share double null comment '总股本(万股)',
float_share double null comment '流通股本(万股)',
free_share double null comment '自由流通股本(万股)',
total_mv double null comment '总市值(万元)',
circ_mv double null comment '流通市值(万元)',
source varchar(20) default 'tushare' null comment '数据来源',
update_time datetime default CURRENT_TIMESTAMP null on update CURRENT_TIMESTAMP comment '更新时间',
primary key (trade_date, ts_code)
)
comment 'Tushare每日基础指标(daily_basic)' engine = InnoDB collate = utf8mb4_general_ci;
- 财务指标 fina_indicator
sql
create table fina_indicator
(
ts_code varchar(12) not null comment 'TS代码',
ann_date date null comment '公告日期',
end_date date not null comment '报告期',
eps float null comment '基本每股收益',
dt_eps float null comment '稀释每股收益',
total_revenue_ps float null comment '每股营业总收入',
revenue_ps float null comment '每股营业收入',
capital_rese_ps float null comment '每股资本公积',
surplus_rese_ps float null comment '每股盈余公积',
undist_profit_ps float null comment '每股未分配利润',
extra_item float null comment '非经常性损益',
profit_dedt float null comment '扣除非经常性损益后的净利润(扣非净利润)',
gross_margin float null comment '毛利',
current_ratio float null comment '流动比率',
quick_ratio float null comment '速动比率',
cash_ratio float null comment '保守速动比率',
invturn_days float null comment '存货周转天数',
arturn_days float null comment '应收账款周转天数',
inv_turn float null comment '存货周转率',
ar_turn float null comment '应收账款周转率',
ca_turn float null comment '流动资产周转率',
fa_turn float null comment '固定资产周转率',
assets_turn float null comment '总资产周转率',
op_income float null comment '经营活动净收益',
valuechange_income float null comment '价值变动净收益',
interst_income float null comment '利息费用',
daa float null comment '折旧与摊销',
ebit float null comment '息税前利润',
ebitda float null comment '息税折旧摊销前利润',
fcff float null comment '企业自由现金流量',
fcfe float null comment '股权自由现金流量',
current_exint float null comment '无息流动负债',
noncurrent_exint float null comment '无息非流动负债',
interestdebt float null comment '带息债务',
netdebt float null comment '净债务',
tangible_asset float null comment '有形资产',
working_capital float null comment '营运资金',
networking_capital float null comment '营运流动资本',
invest_capital float null comment '全部投入资本',
retained_earnings float null comment '留存收益',
diluted2_eps float null comment '期末摊薄每股收益',
bps float null comment '每股净资产',
ocfps float null comment '每股经营活动产生的现金流量净额',
retainedps float null comment '每股留存收益',
cfps float null comment '每股现金流量净额',
ebit_ps float null comment '每股息税前利润',
fcff_ps float null comment '每股企业自由现金流量',
fcfe_ps float null comment '每股股东自由现金流量',
netprofit_margin float null comment '销售净利率',
grossprofit_margin float null comment '销售毛利率',
cogs_of_sales float null comment '销售成本率',
expense_of_sales float null comment '销售期间费用率',
profit_to_gr float null comment '净利润/营业总收入',
saleexp_to_gr float null comment '销售费用/营业总收入',
adminexp_of_gr float null comment '管理费用/营业总收入',
finaexp_of_gr float null comment '财务费用/营业总收入',
impai_ttm float null comment '资产减值损失/营业总收入',
gc_of_gr float null comment '营业总成本/营业总收入',
op_of_gr float null comment '营业利润/营业总收入',
ebit_of_gr float null comment '息税前利润/营业总收入',
roe float null comment '净资产收益率',
roe_waa float null comment '加权平均净资产收益率',
roe_dt float null comment '净资产收益率(扣除非经常损益)',
roa float null comment '总资产报酬率',
npta float null comment '总资产净利润',
roic float null comment '投入资本回报率',
roe_yearly float null comment '年化净资产收益率',
roa2_yearly float null comment '年化总资产报酬率',
roe_avg float null comment '平均净资产收益率(增发条件)',
opincome_of_ebt float null comment '经营活动净收益/利润总额',
investincome_of_ebt float null comment '价值变动净收益/利润总额',
n_op_profit_of_ebt float null comment '营业外收支净额/利润总额',
tax_to_ebt float null comment '所得税/利润总额',
dtprofit_to_profit float null comment '扣除非经常损益后的净利润/净利润',
salescash_to_or float null comment '销售商品提供劳务收到的现金/营业收入',
ocf_to_or float null comment '经营活动产生的现金流量净额/营业收入',
ocf_to_opincome float null comment '经营活动产生的现金流量净额/经营活动净收益',
capitalized_to_da float null comment '资本支出/折旧和摊销',
debt_to_assets float null comment '资产负债率',
assets_to_eqt float null comment '权益乘数',
dp_assets_to_eqt float null comment '权益乘数(杜邦分析)',
ca_to_assets float null comment '流动资产/总资产',
nca_to_assets float null comment '非流动资产/总资产',
tbassets_to_totalassets float null comment '有形资产/总资产',
int_to_talcap float null comment '带息债务/全部投入资本',
eqt_to_talcapital float null comment '归属于母公司的股东权益/全部投入资本',
currentdebt_to_debt float null comment '流动负债/负债合计',
longdeb_to_debt float null comment '非流动负债/负债合计',
ocf_to_shortdebt float null comment '经营活动产生的现金流量净额/流动负债',
debt_to_eqt float null comment '产权比率',
eqt_to_debt float null comment '归属于母公司的股东权益/负债合计',
eqt_to_interestdebt float null comment '归属于母公司的股东权益/带息债务',
tangibleasset_to_debt float null comment '有形资产/负债合计',
tangasset_to_intdebt float null comment '有形资产/带息债务',
tangibleasset_to_netdebt float null comment '有形资产/净债务',
ocf_to_debt float null comment '经营活动产生的现金流量净额/负债合计',
ocf_to_interestdebt float null comment '经营活动产生的现金流量净额/带息债务',
ocf_to_netdebt float null comment '经营活动产生的现金流量净额/净债务',
ebit_to_interest float null comment '已获利息倍数(EBIT/利息费用)',
longdebt_to_workingcapital float null comment '长期债务与营运资金比率',
ebitda_to_debt float null comment '息税折旧摊销前利润/负债合计',
turn_days float null comment '营业周期',
roa_yearly float null comment '年化总资产净利率',
roa_dp float null comment '总资产净利率(杜邦分析)',
fixed_assets float null comment '固定资产合计',
profit_prefin_exp float null comment '扣除财务费用前营业利润',
non_op_profit float null comment '非营业利润',
op_to_ebt float null comment '营业利润/利润总额',
nop_to_ebt float null comment '非营业利润/利润总额',
ocf_to_profit float null comment '经营活动产生的现金流量净额/营业利润',
cash_to_liqdebt float null comment '货币资金/流动负债',
cash_to_liqdebt_withinterest float null comment '货币资金/带息流动负债',
op_to_liqdebt float null comment '营业利润/流动负债',
op_to_debt float null comment '营业利润/负债合计',
roic_yearly float null comment '年化投入资本回报率',
total_fa_trun float null comment '固定资产合计周转率',
profit_to_op float null comment '利润总额/营业收入',
q_opincome float null comment '经营活动单季度净收益',
q_investincome float null comment '价值变动单季度净收益',
q_dtprofit float null comment '扣除非经常损益后的单季度净利润',
q_eps float null comment '每股收益(单季度)',
q_netprofit_margin float null comment '销售净利率(单季度)',
q_gsprofit_margin float null comment '销售毛利率(单季度)',
q_exp_to_sales float null comment '销售期间费用率(单季度)',
q_profit_to_gr float null comment '净利润/营业总收入(单季度)',
q_saleexp_to_gr float null comment '销售费用/营业总收入 (单季度)',
q_adminexp_to_gr float null comment '管理费用/营业总收入 (单季度)',
q_finaexp_to_gr float null comment '财务费用/营业总收入 (单季度)',
q_impair_to_gr_ttm float null comment '资产减值损失/营业总收入(单季度)',
q_gc_to_gr float null comment '营业总成本/营业总收入 (单季度)',
q_op_to_gr float null comment '营业利润/营业总收入(单季度)',
q_roe float null comment '净资产收益率(单季度)',
q_dt_roe float null comment '净资产单季度收益率(扣除非经常损益)',
q_npta float null comment '总资产净利润(单季度)',
q_opincome_to_ebt float null comment '经营活动净收益/利润总额(单季度)',
q_investincome_to_ebt float null comment '价值变动净收益/利润总额(单季度)',
q_dtprofit_to_profit float null comment '扣除非经常损益后的净利润/净利润(单季度)',
q_salescash_to_or float null comment '销售商品提供劳务收到的现金/营业收入(单季度)',
q_ocf_to_sales float null comment '经营活动产生的现金流量净额/营业收入(单季度)',
q_ocf_to_or float null comment '经营活动产生的现金流量净额/经营活动净收益(单季度)',
basic_eps_yoy float null comment '基本每股收益同比增长率(%)',
dt_eps_yoy float null comment '稀释每股收益同比增长率(%)',
cfps_yoy float null comment '每股经营活动产生的现金流量净额同比增长率(%)',
op_yoy float null comment '营业利润同比增长率(%)',
ebt_yoy float null comment '利润总额同比增长率(%)',
netprofit_yoy float null comment '归属母公司股东的净利润同比增长率(%)',
dt_netprofit_yoy float null comment '归属母公司股东的净利润-扣除非经常损益同比增长率(%)',
ocf_yoy float null comment '经营活动产生的现金流量净额同比增长率(%)',
roe_yoy float null comment '净资产收益率(摊薄)同比增长率(%)',
bps_yoy float null comment '每股净资产相对年初增长率(%)',
assets_yoy float null comment '资产总计相对年初增长率(%)',
eqt_yoy float null comment '归属母公司的股东权益相对年初增长率(%)',
tr_yoy float null comment '营业总收入同比增长率(%)',
or_yoy float null comment '营业收入同比增长率(%)',
q_gr_yoy float null comment '营业总收入同比增长率(%)(单季度)',
q_gr_qoq float null comment '营业总收入环比增长率(%)(单季度)',
q_sales_yoy float null comment '营业收入同比增长率(%)(单季度)',
q_sales_qoq float null comment '营业收入环比增长率(%)(单季度)',
q_op_yoy float null comment '营业利润同比增长率(%)(单季度)',
q_op_qoq float null comment '营业利润环比增长率(%)(单季度)',
q_profit_yoy float null comment '净利润同比增长率(%)(单季度)',
q_profit_qoq float null comment '净利润环比增长率(%)(单季度)',
q_netprofit_yoy float null comment '归属母公司股东的净利润同比增长率(%)(单季度)',
q_netprofit_qoq float null comment '归属母公司股东的净利润环比增长率(%)(单季度)',
equity_yoy float null comment '净资产同比增长率',
rd_exp float null comment '研发费用',
update_flag varchar(10) null comment '更新标识',
source varchar(32) default 'tushare' null comment '数据来源',
update_time datetime default CURRENT_TIMESTAMP null comment '更新时间',
primary key (ts_code, end_date)
)
comment '财务指标表 (Tushare fina_indicator)' engine = InnoDB collate = utf8mb4_general_ci;
create index idx_ann_date on fina_indicator (ann_date);
- 万申行业数据 industry_data
sql
create table industry_data
(
ts_code char(9) not null comment '股票代码'
primary key,
name varchar(50) null comment '股票名称',
l1_code varchar(15) null comment '一级行业代码',
l1_name varchar(50) null comment '一级行业名称',
l2_code varchar(15) null,
l2_name varchar(50) null,
l3_code varchar(15) null,
l3_name varchar(50) null,
in_date date null comment '纳入日期',
out_date date null comment '移出日期',
is_new char default 'Y' null comment '是否当前有效(Y/N)',
source varchar(20) default 'tushare' null comment '数据来源',
update_time datetime default CURRENT_TIMESTAMP null on update CURRENT_TIMESTAMP
)
comment '申万一级行业分类' engine = InnoDB collate = utf8mb4_general_ci;
- 股票基本数据 stock_basic
sql
create table stock_basic
(
ts_code varchar(10) not null comment 'TS代码'
primary key,
symbol varchar(10) null comment '股票代码',
name varchar(50) null comment '股票名称',
area varchar(50) null comment '地域',
industry varchar(50) null comment '所属行业',
fullname varchar(100) null comment '股票全称',
enname varchar(100) null comment '英文全称',
cnspell varchar(50) null comment '拼音缩写',
market varchar(20) null comment '市场类型(主板/创业板/科创板/CDR)',
exchange varchar(20) null comment '交易所代码',
curr_type varchar(10) null comment '交易货币',
list_status char null comment '上市状态 L上市 D退市 P暂停上市',
list_date date null comment '上市日期',
delist_date date null comment '退市日期',
is_hs char null comment '是否沪深港通标的 N否 H沪股通 S深股通',
act_name varchar(100) null comment '实控人名称',
act_ent_type varchar(50) null comment '实控人企业性质',
source varchar(15) null,
update_time datetime default CURRENT_TIMESTAMP null
)
comment 'Tushare股票基础信息表' engine = InnoDB collate = utf8mb4_general_ci;
- 数据库操作记录表 update_log
最后建一个表用于存储数据库操作,方便排查错误
sql
create table update_log
(
id bigint auto_increment comment '自增主键'
primary key,
table_name varchar(50) not null comment '表名',
ts_code varchar(12) null comment '股票代码',
start_date char(8) null comment '开始日期',
end_date char(8) null comment '结束日期',
success_rows int default 0 null comment '成功行数',
failed_rows int default 0 null comment '失败行数',
status varchar(10) null comment '任务状态',
message text null comment '错误或日志信息',
created_at datetime default CURRENT_TIMESTAMP null comment '创建时间'
)
comment '数据更新日志表' engine = InnoDB collate = utf8mb4_general_ci;
代码
我们将数据库获取代码进行维护,目前只做了获取全部数据,后续会开发增量更新。首先是导入相关的包。
python
import os
import numpy as np
import tushare as ts
from sqlalchemy import create_engine, inspect, text, Table, MetaData
from sqlalchemy.dialects.mysql import insert
from datetime import datetime, timedelta
import traceback
import time
创建一个基于python的mysql数据库维护类,首先把通用的工具方法
python
class TushareDBUpdater:
"""
通用 Tushare → MySQL 更新器
自动断点续传、日志记录、字段对齐
"""
def __init__(self, token, db_url, db_name="quantdb"):
self.pro = ts.pro_api(token)
self.engine = create_engine(db_url, pool_pre_ping=True)
self.db_name = db_name
# ========== 工具方法 ==========
def write_log(self, table_name, ts_code, start_date, end_date, success_rows, failed_rows, status, message):
"""数据库更新日志,写入 update_log 表"""
sql = text("""
INSERT INTO update_log (table_name, ts_code, start_date, end_date,
success_rows, failed_rows, status, message)
VALUES (:table_name, :ts_code, :start_date, :end_date,
:success_rows, :failed_rows, :status, :message)
""")
params = {
"table_name": table_name,
"ts_code": ts_code,
"start_date": start_date,
"end_date": end_date,
"success_rows": success_rows,
"failed_rows": failed_rows,
"status": status,
"message": (message or "")[:500],
}
with self.engine.begin() as conn:
conn.execute(sql, params)
def get_last_trade_date(self, table_name):
"""获取最新交易日(断点续传用)"""
with self.engine.connect() as conn:
result = conn.execute(text(f"SELECT MAX(trade_date) FROM {table_name}")).fetchone()
return result[0] if result and result[0] else None
def filter_df_columns(self, df, table_name):
"""自动过滤 DataFrame 列,仅保留数据库字段"""
inspector = inspect(self.engine)
db_cols = [col["name"] for col in inspector.get_columns(table_name)]
valid_cols = [c for c in df.columns if c in db_cols]
invalid = [c for c in df.columns if c not in db_cols]
if invalid:
print(f"⚠️ 表 {table_name} 删除多余列 {invalid}")
return df[valid_cols].copy()
def safe_write(self, df, table_name):
"""安全写入(带 UPSERT 功能)"""
if df is None or df.empty:
return True, 0
try:
# 自动加载表结构
metadata = MetaData()
table = Table(table_name, metadata, autoload_with=self.engine)
# 转换成字典列表
records = df.to_dict(orient="records")
if not records:
return True, 0
# 构造插入语句
stmt = insert(table)
upsert_stmt = stmt.on_duplicate_key_update(
**{
col: stmt.inserted[col]
for col in df.columns
if col not in ["ts_code", "trade_date"] # 主键字段不更新
}
)
# 执行并自动提交
with self.engine.begin() as conn:
conn.execute(upsert_stmt, records)
return True, len(df)
except Exception as e:
return False, str(e)
实现通用更新器
python
# ========== 通用更新器 ==========
def update_table(self, table_name, api_func, api_kwargs=None, filter_cols=None, key_field="ts_code",
chunk_sleep=1.2, enable_resume=True, add_source=True, add_time=True):
"""
通用 Tushare → MySQL 更新函数
参数:
table_name: 目标表名
api_func: Tushare 接口函数,如 self.pro.daily
api_kwargs: 参数字典
filter_cols: 选取字段(可选)
enable_resume: 是否断点续传
"""
api_kwargs = api_kwargs or {}
print(f"\n🚀 开始更新 {table_name}")
# 断点续传
if enable_resume:
last_date = self.get_last_trade_date(table_name)
if last_date:
api_kwargs["start_date"] = (datetime.strptime(last_date, "%Y%m%d") + timedelta(days=1)).strftime("%Y%m%d")
print(f"🔁 断点续传,从 {api_kwargs['start_date']} 继续")
# 默认时间范围
api_kwargs.setdefault("end_date", datetime.now().strftime("%Y%m%d"))
start_date, end_date = api_kwargs.get("start_date"), api_kwargs.get("end_date")
all_stocks = self.pro.stock_basic(fields="ts_code")["ts_code"].tolist()
success_total, fail_total = 0, 0
for code in all_stocks:
try:
df = api_func(ts_code=code, **api_kwargs)
if df is None or df.empty:
self.write_log(table_name, code, start_date, end_date, 0, 0, "empty", "no data")
continue
if filter_cols:
df = df[filter_cols]
if add_source:
df["source"] = "tushare"
if add_time:
df["update_time"] = datetime.now()
df = self.filter_df_columns(df, table_name)
df = df.replace([np.inf, -np.inf], np.nan)
df = df.replace({np.nan: None})
ok, result = self.safe_write(df, table_name)
if ok:
self.write_log(table_name, code, start_date, end_date, result, 0, "success", "ok")
success_total += result
else:
self.write_log(table_name, code, start_date, end_date, 0, len(df), "failed", result)
fail_total += len(df)
time.sleep(chunk_sleep)
except Exception:
msg = traceback.format_exc()
self.write_log(table_name, code, start_date, end_date, 0, 0, "failed", msg)
time.sleep(chunk_sleep + 1)
print(f"✅ {table_name} 更新完成,总成功 {success_total} 条,失败 {fail_total} 条。")
然后是对应每个表的更新方法。
python
# ========== 专用接口方法 ==========
# 更新 daily 表,历史日线
def update_daily(self, start_date="20240101", end_date=None, enable_resume=True):
self.update_table(
"daily",
api_func=self.pro.daily,
api_kwargs={"start_date": start_date, "end_date": end_date},
filter_cols= None,
enable_resume=enable_resume,
)
# 更新 adj_factor 表,复权因子
def update_adj_factor(self, start_date="20240101", end_date=None, enable_resume=True):
self.update_table(
"adj_factor",
api_func=self.pro.adj_factor,
api_kwargs={"start_date": start_date, "end_date": end_date},
enable_resume=enable_resume,
)
# 更新 daily_basic 表,每日行情
def update_daily_basic(self, start_date="20240101", end_date=None, enable_resume=True):
"""TODO 没有获取所有日期"""
print(f"🚀 更新 daily_basic 数据: {start_date} → {end_date or datetime.now().strftime('%Y%m%d')}")
df = self.pro.daily_basic(start_date=start_date, end_date=end_date)
if df is None or df.empty:
print("⚠️ 无数据返回。")
return
df["source"] = "tushare"
df["update_time"] = datetime.now()
df = df.replace([np.inf, -np.inf], np.nan)
df = df.replace({np.nan: None})
conn = self.engine.connect()
metadata = MetaData()
table = Table("daily_basic", metadata, autoload_with=self.engine)
success, failed = 0, 0
for _, row in df.iterrows():
data = row.to_dict()
stmt = insert(table).values(**data)
upsert = stmt.on_duplicate_key_update(
**{col: stmt.inserted[col] for col in data.keys()}
)
try:
conn.execute(upsert)
conn.commit()
success += 1
except Exception:
failed += 1
msg = traceback.format_exc()
print(msg)
conn.commit()
conn.close()
self.write_log("daily_basic", None, start_date, end_date, success, failed, "success", "ok")
print(f"✅ daily_basic 更新完成: 成功 {success} 条,失败 {failed} 条。")
# 更新 fina_indicator表,财务数据
def update_fina_indicator(self, start_date="20240101", end_date=None, enable_resume=True):
"""TODO Tusahre api need ts_code param """
data = self.pro.stock_basic(exchange='', list_status='L', fields='ts_code')
for row in data.itertuples(index=False):
ts_code = row.ts_code
df = self.pro.fina_indicator(ts_code=ts_code, start_date=start_date, end_date=end_date)
if df is None or df.empty:
print("无数据返回!")
return None
df["source"] = "tushare"
df["update_time"] = datetime.now()
df = df.replace([np.inf, -np.inf], np.nan)
df = df.replace({np.nan: None})
conn = self.engine.connect()
metadata = MetaData()
table = Table("fina_indicator", metadata, autoload_with=self.engine)
success, failed = 0, 0
for _, row in df.iterrows():
data = row.to_dict()
stmt = insert(table).values(**data)
upsert = stmt.on_duplicate_key_update(
**{col: stmt.inserted[col] for col in data.keys()}
)
try:
conn.execute(upsert)
conn.commit()
success += 1
except Exception:
failed += 1
msg = traceback.format_exc()
print(msg)
conn.commit()
conn.close()
self.write_log("fina_indicator", None, start_date, end_date, success, failed, "success", "ok")
print(f"✅ fina_indicator 更新完成: 成功 {success} 条,失败 {failed} 条。")
def update_industry_data(self):
"""
更新申万行业映射: TODO 需要获取所有股票,然后按股票检索更新行业分类!
"""
print("🚀 更新 industry_data (行业分类)")
try:
df = self.pro.index_member_all()
df["in_date"] = datetime.now().strftime("%Y%m%d")
df["out_date"] = datetime.now().strftime("%Y%m%d")
df["source"] = "tushare"
df["update_time"] = datetime.now()
df = df.replace([np.inf, -np.inf], np.nan)
df = df.replace({np.nan: None})
conn = self.engine.connect()
metadata = MetaData()
table = Table("industry_data", metadata, autoload_with=self.engine)
success, failed = 0, 0
for _, row in df.iterrows():
data = row.to_dict()
stmt = insert(table).values(**data)
upsert = stmt.on_duplicate_key_update(**{c: stmt.inserted[c] for c in data.keys()})
try:
conn.execute(upsert)
conn.commit()
success += 1
except Exception:
failed += 1
conn.commit()
conn.close()
self.write_log("industry_data", None, None, None, success, failed, "success", "ok")
print(f"✅ 行业映射更新完成: 成功 {success}, 失败 {failed}")
except Exception:
msg = traceback.format_exc()
self.write_log("industry_data", None, None, None, 0, 0, "failed", msg)
print(f"❌ 更新失败: {msg}")
def update_stock_basic(self, start_date="20200101", end_date=None):
"""
更新股票的基本数据
"""
print("🚀 更新股票的基本数据")
try:
df = self.pro.stock_basic(exchange='', list_status='L')
df["source"] = "tushare"
df["update_time"] = datetime.now()
df = df.replace([np.inf, -np.inf], np.nan)
df = df.replace({np.nan: None})
conn = self.engine.connect()
metadata = MetaData()
table = Table("stock_basic", metadata, autoload_with=self.engine)
success, failed = 0, 0
for _, row in df.iterrows():
data = row.to_dict()
stmt = insert(table).values(**data)
upsert = stmt.on_duplicate_key_update(**{c: stmt.inserted[c] for c in data.keys()})
try:
conn.execute(upsert)
conn.commit()
success += 1
except Exception:
failed += 1
conn.commit()
conn.close()
self.write_log("stock_basic", None, None, None, success, failed, "success", "ok")
print(f"✅ 股票基本数据更新完成: 成功 {success}, 失败 {failed}")
except Exception:
msg = traceback.format_exc()
self.write_log("stock_basic", None, None, None, 0, 0, "failed", msg)
print(f"❌ 更新失败: {msg}")
最后我们可以使用main函数进行测试
python
# ========== 运行示例 ==========
if __name__ == "__main__":
updater = TushareDBUpdater(
token="your_tushare_token",
db_url="mysql+pymysql://root:123456@localhost:3306/quantdb?charset=utf8mb4"
)
# 测试时任选一个执行,或全部执行:
updater.update_adj_factor(start_date="19900101", end_date=None, enable_resume=False)
updater.update_daily(start_date="19900101", end_date=None,enable_resume=False)
updater.update_daily_basic(start_date="19900101", end_date=None,enable_resume=False)
updater.update_fina_indicator(start_date="19900101", end_date=None,enable_resume=False)
updater.update_industry_data()
updater.update_stock_basic(start_date="19900101", end_date=None)
上面代码仍需要优化,因为pymysql的事务操作很慢。