使用Python查找大文件的实用脚本

C盘满了,写了一个python脚本,2分多钟能找到比较大的文件,然后手动删除或者迁移D盘,最后发现是微信小程序开发工具缓存文件太多了,腾出来10个G念头通达了,这里备份一下脚本。

运行工具:PyCharm 2024.1.3 (Community Edition)

【完整代码】

python 复制代码
import os
import threading
import time
import sys
from threading import Event

def is_large(file_path, threshold_mb):
    """判断文件大小是否超过指定MB阈值"""
    return os.path.getsize(file_path) / (1024 * 1024) > threshold_mb

def show_loading_animation(stop_event, interval=0.5):
    """显示简易的文本加载动画,直到接收到停止信号"""
    loading_chars = ['.', '..', '...', '....']
    total_cycles = int(interval * 10)
    cycle_length = 10

    for _ in range(total_cycles):
        for char in loading_chars:
            for _ in range(cycle_length):
                sys.stdout.write('\r正在查找大文件... ' + char)
                sys.stdout.flush()
                time.sleep(interval / cycle_length)
            sys.stdout.write('\r正在查找大文件... ' + loading_chars[0])
            sys.stdout.flush()

def filter_files(files, skip_file_keywords, include_file_keywords, extension=None):
    """根据文件名关键词和扩展名过滤文件列表"""
    filtered_files = [file for file in files if (not skip_file_keywords or all(keyword not in file for keyword in skip_file_keywords)) and
                      (not include_file_keywords or any(keyword in file for keyword in include_file_keywords))]
    if extension is not None:
        filtered_files = [file for file in filtered_files if file.endswith('.' + extension)]
    return filtered_files

def filter_dirs(dirs, skip_dir_keywords, include_dir_keywords):
    """根据目录名关键词过滤目录列表"""
    return [dir for dir in dirs if (not skip_dir_keywords or all(keyword not in dir for keyword in skip_dir_keywords)) and
             (not include_dir_keywords or any(keyword in dir for keyword in include_dir_keywords))]

def get_all_large_files_with_loading(dir_path, threshold_mb, skip_dir_keywords, skip_file_keywords, include_dir_keywords, include_file_keywords, extension=None, interval=0.5):
    """查找目录下所有大于指定大小的文件,同时跳过或仅包括特定关键词的文件夹及文件名称,并显示加载动画直到完成"""
    start_time = time.time()
    stop_event = Event()
    large_files = []
    loading_thread = threading.Thread(target=show_loading_animation, args=(stop_event, interval))
    loading_thread.daemon = True
    loading_thread.start()

    try:
        for root, dirs, files in os.walk(dir_path):
            dirs[:] = filter_dirs(dirs, skip_dir_keywords, include_dir_keywords)
            filtered_files = filter_files(files, skip_file_keywords, include_file_keywords, extension)
            for file in filtered_files:
                full_path = os.path.join(root, file)
                try:
                    if is_large(full_path, threshold_mb):
                        file_info = {'path': full_path, 'size': os.path.getsize(full_path) / 1024 / 1024}
                        large_files.append(file_info)
                except Exception as e:
                    print(f"警告访问文件出错 {full_path} 出错信息: {e}")

    finally:
        stop_event.set()
        loading_thread.join()
    large_files.sort(key=lambda x: x['size'], reverse=True)
    for file_info in large_files:
        print(f"文件路径: {file_info['path']} | 文件大小: {file_info['size']:.2f} MB")

    end_time = time.time()
    print(f"\n查找共耗时: {end_time - start_time:.2f} 秒")

def main():
    dir_path = input("请输入要检查的目录路径: ")
    try:
        threshold_mb = float(input("请输入文件大小阈值(单位: MB): "))
        skip_dir_keywords = input("请输入要跳过的文件夹名关键词,用逗号分隔(直接回车跳过,推荐modules,~~,.gradle): ").split(',')
        skip_file_keywords = input("请输入要跳过的文件名关键词,用逗号分隔(直接回车跳过,推荐$): ").split(',')
        include_dir_keywords = input("请输入要包含的文件夹名关键词,用逗号分隔(直接回车跳过): ").split(',')
        include_file_keywords = input("请输入要包含的文件名关键词,用逗号分隔(直接回车跳过): ").split(',')
        extension = input("请输入要筛选的文件扩展名(例如:txt,可选,直接回车跳过): ").strip('.') or None
        get_all_large_files_with_loading(dir_path, threshold_mb, skip_dir_keywords, skip_file_keywords, include_dir_keywords, include_file_keywords, extension)
        print("搜索结束.")
    except ValueError:
        print("错误:请输入有效的数字作为文件大小阈值.")
    except OSError as e:
        print(e)

if __name__ == '__main__':
    main()
相关推荐
AC赳赳老秦4 分钟前
OpenClaw email技能:批量发送邮件、自动回复,高效处理工作邮件
运维·人工智能·python·django·自动化·deepseek·openclaw
zhaoshuzhaoshu13 分钟前
Python 语法之数据结构详细解析
python
AI问答工程师39 分钟前
Meta Muse Spark 的"思维压缩"到底是什么?我用 Python 复现了核心思路(附代码)
人工智能·python
zfan5202 小时前
python对Excel数据处理(1)
python·excel·pandas
小饕2 小时前
我从零搭建 RAG 学到的 10 件事
python
老歌老听老掉牙2 小时前
PyQt5+Qt Designer实战:可视化设计智能参数配置界面,告别手动布局时代!
python·qt
格鸰爱童话2 小时前
向AI学习项目技能(六)
java·人工智能·spring boot·python·学习
悟空爬虫-彪哥2 小时前
VRChat开发环境配置,零基础教程
python
数据知道3 小时前
《 Claude Code源码分析与实践》专栏目录
python·ai·github·claude code·claw code
曲幽3 小时前
FastAPI+Vue:文件分片上传+秒传+断点续传,这坑我帮你踩平了!
python·vue·upload·fastapi·web·blob·chunk·spark-md5