【python实战】-- 解压&提取所有指定文件的指定内容

系列文章目录

文章目录


前言

一、pandas是什么?

1、需求

指定目录下有若干文件

批量解压

需要汇总包含指定字符的所有文件中的指定数据


2、程序

python 复制代码
import os
import shutil
import zipfile
import pandas as pd
import xlrd
import xlwt
import csv
from xlutils.copy import copy
from openpyxl import Workbook
from openpyxl import load_workbook
from os.path import dirname
from decimal import Decimal
from openpyxl.utils.dataframe import dataframe_to_rows
# 读写2007 excel
import openpyxl
from openpyxl.styles import numbers
from openpyxl.styles import Alignment
import glob
import tkinter as tk
from tkinter import messagebox
from tkinter import simpledialog

zippath = input("请输入需解压的文件路径:\n")
parent_path = zippath

file_flag = '.zip'
def del_old_zip(file_path):
    os.remove(file_path)
def decompress(file_path,root):
    z = zipfile.ZipFile(f"{file_path}","r")
    z.extractall(path=f"{root}")
    for names in z.namelist():
        if names.endswith(file_flag):
            z.close()
            return 1
    z.close()
    return 0 

def start_dir_make(root,dirname):
    os.chdir(root)
    os.mkdir(dirname)
    return os.path.join(root,dirname)

def rem_dir_extra(root,father_dir_name):
    try:
        for item in os.listdir(os.path.join(root,father_dir_name)):
            if not os.path.isdir(os.path.join(root,father_dir_name,item)):
                continue
            if item == father_dir_name and len(os.listdir(os.path.join(root,father_dir_name))) == 1:
                os.chdir(root)
                os.rename(father_dir_name,father_dir_name + '-old')
                shutil.move(os.path.join(root,father_dir_name + '-old', item),os.path.join(root))
                os.rmdir(os.path.join(root,father_dir_name + '-old'))
                rem_dir_extra(root,item)
            else:
                rem_dir_extra(os.path.join(root,father_dir_name),item)
    except Exception as e:
        print("清除文件夹出错"+str(e))

def get_allfile_msg(file_dir):
    for root, dirs, files in os.walk(file_dir):
        return root, dirs, [file for file in files if file.endswith('.xls') or file.endswith('.xlsx') or file.endswith('.csv')] 

def get_allfile_url(root, files):
    allFile_url = []
    for file_name in files:
        file_url = root + "/" + file_name
        allFile_url.append(file_url)
    return allFile_url

def get_file_name(path, suffix = ['.xlsx', '.xls','.csv']):  #'.xlsx', '.xls',
    tmp_lst = []
    for root,dirs,files in os.walk(path):
        for file in files:
            tmp_lst.append(os.path.join(root, file))
    return tmp_lst

def extract_last_part_of_path(path):
    return os.path.basename(path)

#定义读取csv_pandas
def read_csv_file(file_path):
    #参数:error_bad_lines=False跳过错误的行 delimiter=',',encoding = 'gbk',header = 0, engine='python'  sep = r"\s+\s{0}"  encoding = "iso-8859-1"
    return pd.read_csv(file_path,encoding = 'latin1',sep = r"\s+\s{0}",dtype=object,quotechar="'",delimiter=',',doublequote=True,engine="python",header = 1)   #第2行作为表头

if __name__ == '__main__':
    flag = 1
    while flag:
        for root,dirs,files in os.walk(parent_path):
            for name in files:
                if name.endswith(file_flag):
                    new_ws = start_dir_make(root,name.replace(file_flag,""))
                    zip_path = os.path.join(root,name)
                    flag = decompress(zip_path,new_ws)
                    del_old_zip(zip_path)
                    rem_dir_extra(root,name.replace(file_flag,""))
                    print(f'{root}\\{name}'.join(['文件:','\n解压完成\n']))
    rem_dir_extra(os.path.split(parent_path)[0],os.path.split(parent_path)[1])
    print("解压完成,请检查!!")
    print("请输入汇总需求,S1或S2或S1S2")


    wb = Workbook()
    ws = wb.active
    ws.title="Summary"
    #设置所有单元格的对齐方式为居中
    alignment = Alignment(horizontal='center',vertical='center')   
    titlesS1 = ['data1','data2','data3']    
    titlesS2 = ['data4','data5','data6']  
    titlesS1S2 = ['data1','data2','data3','data4','data5','data6']
    #第一列波段设置区域
    ws.cell(row = 1,column = 1).value = '判定'
    ws.cell(row = 1,column = 1).alignment = alignment
    ws.cell(row = 5,column = 1).value = '文件名'
    ws.cell(row = 5,column = 1).alignment = alignment
    ws.cell(row = 6,column = 1).value = 'wave'
    ws.cell(row = 6,column = 1).alignment = alignment
    for l in range(380,1051):
        ws.cell(l-373,1).value = l
        ws.cell(l-373,1).alignment = alignment
        continue
    #*****************************************************************

    #读取指定文件夹
    #file_dir = os.getcwd()
    file_dir = parent_path
    current_path = os.path.dirname(os.path.abspath(__file__))
    #file_dir = r"D:\Users\gxcaoty\Desktop\39526-905\一车间"
    root, dirs, files = get_allfile_msg(file_dir)
    allFile_url = get_allfile_url(root, files)
    dir_numbers = len(dirs)    #file_dir下的文件夹个数
    
    user_input = input("请输入S1或S2或S1S2\n")
    count = 0
    for root,dirs,files in os.walk(file_dir):
        for file_path in glob.glob(os.path.join(root,'*.csv')):
            if '39526A-905' in file_path and 'Add' not in file_path:
                print(file_path)
                xl = file_path
                count += 1
                c = count
                m = c - 1
                print(f"共发现 {m} 个文件!")
                #print(files_chose)
                try:
                    last_part = extract_last_part_of_path(xl)
                    #print(last_part)  #filename为文件名
                    filename = xl 
                    csv_data = read_csv_file(filename)
                    df = csv_data
                    if user_input == "S1":
                        df = df.iloc[:,1:4]
                        df = df.astype(float)
                        #print(df)
                        #反射率标准
                        #**********************************************************
                        wave1start = 430
                        wave1end = 530
                        wave1standard = 1.5
                        wave2start = 550
                        wave2end = 780
                        wave2standard = 1.1
                        combinedwave1 = f'{wave1start},{wave1end},{wave1standard}'
                        combinedwave2 = f'{wave2start},{wave2end},{wave2standard}'
                        #print(combinedwave1)
                        ws.cell(row = 2,column = 1).value = combinedwave1
                        ws.cell(row = 3,column = 1).value = combinedwave2
                        #***********************************************************
                        #计算判定区域
                        for n in range(0,3):
                            cal1 = df.iloc[wave1start-380+2:wave1end-380+2,n].max()  
                            cal2 = df.iloc[wave2start-380+2:wave2end-380+2,n].max()  
                            if cal1 <= wave1standard and cal2 <= wave2standard :
                                ws.cell(row = 1,column = n+2+3*m).value = "OK"
                                ws.cell(row = 1,column = n+2+3*m).alignment = alignment
                            else:
                                ws.cell(row = 1,column = n+2+3*m).value = "NG"
                                ws.cell(row = 1,column = n+2+3*m).alignment = alignment
                            #print(ave1,ave2)
                            ws.cell(row = 2,column = n+2+3*m).value = cal1
                            ws.cell(row = 3,column = n+2+3*m).value = cal2
                            continue
                        #文件名输出区域
                        ws.cell(row = 5,column = 2+3*m).value = last_part                     
                        #标题输出区域(data1~data6)
                        for k,title in enumerate(titlesS1,2):                    
                            ws.cell(row = 6,column = k+3*m).value = title
                            ws.cell(row = 6,column = k+3*m).alignment = alignment
                            continue
                        #源数据输出区域
                        for i ,row in df.iterrows():
                            #print(i)
                            for j ,value in enumerate(row,start=1):
                                ws.cell(row = i+7,column = j+1+3*m).value = value

                    elif user_input == "S2":
                        df = df.iloc[:,4:7]
                        df = df.astype(float)
                        #print(df)
                        #反射率标准
                        #**********************************************************
                        wave1start = 430
                        wave1end = 530
                        wave1standard = 1.5
                        wave2start = 550
                        wave2end = 780
                        wave2standard = 1.1
                        combinedwave1 = f'{wave1start},{wave1end},{wave1standard}'
                        combinedwave2 = f'{wave2start},{wave2end},{wave2standard}'
                        #print(combinedwave1)
                        ws.cell(row = 2,column = 1).value = combinedwave1
                        ws.cell(row = 3,column = 1).value = combinedwave2
                        #***********************************************************
                        #计算判定区域
                        for n in range(0,3):
                            cal1 = df.iloc[wave1start-380+2:wave1end-380+2,n].max()  
                            cal2 = df.iloc[wave2start-380+2:wave2end-380+2,n].max()  
                            if cal1 <= wave1standard and cal2 <= wave2standard :
                                ws.cell(row = 1,column = n+2+3*m).value = "OK"
                                ws.cell(row = 1,column = n+2+3*m).alignment = alignment
                            else:
                                ws.cell(row = 1,column = n+2+3*m).value = "NG"
                                ws.cell(row = 1,column = n+2+3*m).alignment = alignment
                            #print(ave1,ave2)
                            ws.cell(row = 2,column = n+2+3*m).value = cal1
                            ws.cell(row = 3,column = n+2+3*m).value = cal2
                            continue
                        #文件名输出区域
                        ws.cell(row = 5,column = 2+3*m).value = last_part                     
                        #标题输出区域(data1~data6)
                        for k,title in enumerate(titlesS2,2):                    
                            ws.cell(row = 6,column = k+3*m).value = title
                            ws.cell(row = 6,column = k+3*m).alignment = alignment
                            continue
                        #源数据输出区域
                        for i ,row in df.iterrows():
                            #print(i)
                            for j ,value in enumerate(row,start=1):
                                ws.cell(row = i+7,column = j+1+3*m).value = value

                    elif user_input == "S1S2":
                        df = df.iloc[:,1:7]
                        df = df.astype(float)
                        #print(df)
                        #反射率标准
                        #**********************************************************
                        wave1start = 430
                        wave1end = 530
                        wave1standard = 1.5
                        wave2start = 550
                        wave2end = 780
                        wave2standard = 1.1
                        combinedwave1 = f'{wave1start},{wave1end},{wave1standard}'
                        combinedwave2 = f'{wave2start},{wave2end},{wave2standard}'
                        #print(combinedwave1)
                        ws.cell(row = 2,column = 1).value = combinedwave1
                        ws.cell(row = 3,column = 1).value = combinedwave2
                        #***********************************************************
                        #计算判定区域
                        for n in range(0,6):
                            cal1 = df.iloc[wave1start-380+2:wave1end-380+2,n].max()  
                            cal2 = df.iloc[wave2start-380+2:wave2end-380+2,n].max()  
                            if cal1 <= wave1standard and cal2 <= wave2standard :
                                ws.cell(row = 1,column = n+2+6*m).value = "OK"
                                ws.cell(row = 1,column = n+2+6*m).alignment = alignment
                            else:
                                ws.cell(row = 1,column = n+2+6*m).value = "NG"
                                ws.cell(row = 1,column = n+2+6*m).alignment = alignment
                            #print(ave1,ave2)
                            ws.cell(row = 2,column = n+2+6*m).value = cal1
                            ws.cell(row = 3,column = n+2+6*m).value = cal2
                            continue
                        #文件名输出区域
                        ws.cell(row = 5,column = 2+6*m).value = last_part                     
                        #标题输出区域(data1~data6)
                        for k,title in enumerate(titlesS1S2,2):                    
                            ws.cell(row = 6,column = k+6*m).value = title
                            ws.cell(row = 6,column = k+6*m).alignment = alignment
                            continue
                        #源数据输出区域
                        for i ,row in df.iterrows():
                            #print(i)
                            for j ,value in enumerate(row,start=1):
                                ws.cell(row = i+7,column = j+1+6*m).value = value

                    else:
                        print("非指定指令")
                except Exception as e:
                    print(e)
    output_file_path=os.path.join(current_path,'SummaryoutS1S2.xlsx')
    wb.save(output_file_path)

总结

分享:

接受可以让我面对所有的问题,当我感到焦虑的时候,通常是因为我发现自己不能接受生活中的一些人、地方、事情,直到我完全接受了它们,我才能获得心灵上的安宁。除非我完全的接受生活,否则我将无法获得快乐。我不需要再纠结这个世界上有什么需要改变而是关注我自己的态度需要发生怎样的改变;

相关推荐
m0_748554815 小时前
golang如何实现用户订阅偏好管理_golang用户订阅偏好管理实现总结
jvm·数据库·python
smj2302_796826525 小时前
解决leetcode第3911题.移除子数组元素后第k小偶数
数据结构·python·算法·leetcode
阿正呀6 小时前
Redis怎样实现本地缓存的高效失效通知
jvm·数据库·python
九转成圣6 小时前
Java 性能优化实战:如何将海量扁平数据高效转化为类目字典树?
java·开发语言·json
SmartRadio6 小时前
ESP32-S3 双模式切换实现:兼顾手机_路由器连接与WiFi长距离通信
开发语言·网络·智能手机·esp32·长距离wifi
2501_901200536 小时前
mysql如何设置InnoDB引擎参数_优化innodb_buffer_pool
jvm·数据库·python
laowangpython6 小时前
Rust 入门:GitHub 热门内存安全编程语言
开发语言·其他·rust·github
我叫汪枫6 小时前
在后台管理系统中,如何递归和选择保留的思路来过滤菜单
开发语言·javascript·node.js·ecmascript
_.Switch7 小时前
东方财富股票数据JS逆向:secids字段和AES加密实战
开发语言·前端·javascript·网络·爬虫·python·ecmascript
软件技术NINI7 小时前
webkit简介及工作流程
开发语言·前端·javascript·udp·ecmascript·webkit·yarn