python 检查csv的数据行的列是否和字段名一致,不一致则删除不一致的数据行,以可以存储
python
import pandas as pd
import time
# csv_file_path = 'Q603887-QhXQStockHisJiaoYiFlow.CSV'
def QhOpenCsvColCount(QhFlePath,Qhencoding="gbk"):
"""
如果数据列数和CSV头不一致则删除,并重新组装df
作者:阙辉
"""
with open(QhFlePath, 'r',encoding=Qhencoding,errors="ignore") as Qhf:
QhDataList = []
for i,QhLine in enumerate(Qhf):
if i == 0:
QhHeaderF = QhLine.strip().split(',')
QhHeaderCloumns = len(QhHeaderF)
else:
QhLineList = QhLine.strip().split(',')
column_count = len(QhLineList) # 根据实际分隔符调整
if QhHeaderCloumns == column_count:
QhDataList.append(QhLineList)
Qhf.close()
time.sleep(3)
QhJieGuoDf = pd.DataFrame(QhDataList,columns=QhHeaderF)
return QhJieGuoDf
# time.sleep(10)
# print(QhDataList)
# print(QhHeaderF)
QhFlePath='Q603887-QhXQStockHisJiaoYiFlow.CSV'
aa = QhOpenCsvColCount(QhFlePath=QhFlePath,Qhencoding="gbk")
print(aa)
aa.to_csv(QhFlePath,encoding="gbk",index=False)
应用
python
@QhStarEndTime
def _QhDBToCsv(QhCsvPath,QhUniqueValue,QhJieGuoDf="",QhDateSort="",
QhIsCsv=True,QhIsMd5=False,QhRearCloumn=""):
"""
存储方法:
QhCsvPath: 存储路径
"""
# print(QhJieGuoDf)
QhWeiYiZhiName = "唯一值" # 唯一值字段名称
try:
# 存储数据
if QhIsCsv:
QhJieGuoDf = QhDfWeiYiZhi(QhJieGuoDf,QhUniqueValue,QhIsMd5=QhIsMd5) # 组合唯一值
"""存储数据到CSV"""
if os.path.exists(QhCsvPath):
"""判断CSV是否存在,存在则读取合并保留最新值(存在则更新,不存在则新增)"""
try:
QhOldCsvDf = pd.read_csv(QhCsvPath,encoding='gbk',low_memory=False) # 读取旧的数据
except:
# 一般情况是列数不一致报错导致的,则采取删除和头不一致的行打开 阙辉
QhOldCsvDf = QhOpenCsvColCount(QhFlePath=QhCsvPath,Qhencoding="gbk")
if not isinstance(QhJieGuoDf,str): # 判断是否是字符串,为字符串则不是DataFrame
if QhIsMd5: QhUniqueValue = [QhWeiYiZhiName] # 如果是MD5去重则以 唯一值 为参考
QhJieGuoDf=QhPdCsvUnique(QhOldCsvDf,QhJieGuoDf,QhUniqueValue) # 合并去重
# else:
# QhJieGuoDf = QhOldCsvDf.copy(deep=True) # 请求失败则取缓存数据 CSV
if QhDateSort != "":
try:QhDfDateSort(QhJieGuoDf,QhFieldName=QhDateSort,QhFormat="%Y-%m-%d") # 日期排序
except:QhDfDateSort(QhJieGuoDf,QhFieldName=QhDateSort,QhFormat="%Y-%m-%d %H:%M:%S") # 日期排序
if QhRearCloumn != "": # 重新字段位置排序
if QhWeiYiZhiName not in QhRearCloumn: QhRearCloumn.insert(0,QhWeiYiZhiName)
QhJieGuoDf = QhJieGuoDf[QhRearCloumn] # 重新字段位置排序
QhJieGuoDf = QhJieGuoDf.drop_duplicates(subset=QhUniqueValue,keep='last',ignore_index=True) # 去重保留最新,保证第一次也是唯一
QhJieGuoDf.to_csv(QhCsvPath,encoding='gbk',index=False,errors= 'ignore')
return QhJieGuoDf
except:
QhErrMsg = traceback.format_exc()
logger.error("【存储CSV】存储失败,报错消息\n{QhErrMsg}!QueHui!".format(QhErrMsg=QhErrMsg))
# 如果失败了则返回历史数据
if os.path.exists(QhCsvPath):
try:
QhJieGuoDf = pd.read_csv(QhCsvPath,encoding='gbk',low_memory=False) # 读取旧的数据
except:
# 一般情况是列数不一致报错导致的,则采取删除和头不一致的行打开 阙辉
QhJieGuoDf = QhOpenCsvColCount(QhFlePath=QhCsvPath,Qhencoding="gbk")
# QhJieGuoDf = pd.read_csv(QhCsvPath,encoding='gbk',low_memory=False)
return QhJieGuoDf