Python Minio 工具类封装

最近因为需要对大规模的文件进行存储,选了多种对象存储方案,最终选择了MinIO,为了方便python的调用,在minio第三方包的基础上进行进一步封装调用,该工具除了基础的功能外,还封装了多线程分片下载文件和上传文件的功能,切片设置不宜过大,因为会受限于机器的带宽,过大会导致带宽被占光影响机器性能。分享的代码仅供学习使用。

复制代码
import os
import io
from minio import Minio
from minio.error import S3Error
from datetime import timedelta
from tqdm import tqdm
from minio.deleteobjects import DeleteObject
from concurrent.futures import as_completed, ThreadPoolExecutor


class Bucket(object):
    client = None
    policy = '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"AWS":["*"]},"Action":["s3:GetBucketLocation","s3:ListBucket"],"Resource":["arn:aws:s3:::%s"]},{"Effect":"Allow","Principal":{"AWS":["*"]},"Action":["s3:GetObject"],"Resource":["arn:aws:s3:::%s/*"]}]}'

    def __new__(cls, *args, **kwargs):
        if not cls.client:
            cls.client = object.__new__(cls)
        return cls.client

    def __init__(self, service, access_key, secret_key, secure=False, section_size=10, t_max=3):
        '''
        实例化参数
        :param service: 服务器地址
        :param access_key: access_key
        :param secret_key: secret_key
        :param secure: secure
        :param section_size: 切片大小mb
        :param t_max: 线程池大小
        '''
        self.service = service
        self.client = Minio(service, access_key=access_key, secret_key=secret_key, secure=secure)
        self.size = section_size * 1024 * 1024
        self.processPool = ThreadPoolExecutor(max_workers=t_max)

    def exists_bucket(self, bucket_name):
        """
        判断桶是否存在
        :param bucket_name: 桶名称
        :return:
        """
        return self.client.bucket_exists(bucket_name=bucket_name)

    def create_bucket(self, bucket_name: str, is_policy: bool=True):
        """
        创建桶 + 赋予策略
        :param bucket_name: 桶名
        :param is_policy: 策略
        :return:
        """
        if self.exists_bucket(bucket_name=bucket_name):
            return False
        else:
            self.client.make_bucket(bucket_name=bucket_name)
        if is_policy:
            policy = self.policy % (bucket_name, bucket_name)
            self.client.set_bucket_policy(bucket_name=bucket_name, policy=policy)
        return True

    def get_bucket_list(self):
        """
        列出存储桶
        :return:
        """
        buckets = self.client.list_buckets()
        bucket_list = []
        for bucket in buckets:
            bucket_list.append(
                {"bucket_name": bucket.name, "create_time": bucket.creation_date}
            )
        return bucket_list

    def remove_bucket(self, bucket_name):
        """
        删除桶
        :param bucket_name:
        :return:
        """
        try:
            self.client.remove_bucket(bucket_name=bucket_name)
        except S3Error as e:
            print("[error]:", e)
            return False
        return True

    def bucket_list_files(self, bucket_name, prefix):
        """
        列出存储桶中所有对象
        :param bucket_name: 同名
        :param prefix: 前缀
        :return:
        """
        try:
            files_list = self.client.list_objects(bucket_name=bucket_name, prefix=prefix, recursive=True)
            for obj in files_list:
                print(obj.bucket_name, obj.object_name.encode('utf-8'), obj.last_modified,
                      obj.etag, obj.size, obj.content_type)
        except S3Error as e:
            print("[error]:", e)

    def bucket_policy(self, bucket_name):
        """
        列出桶存储策略
        :param bucket_name:
        :return:
        """
        try:
            policy = self.client.get_bucket_policy(bucket_name)
        except S3Error as e:
            print("[error]:", e)
            return None
        return policy

    def download_file(self, bucket_name, file, file_path, stream=1024*32):
        """
        从bucket 下载文件 + 写入指定文件
        :return:
        """
        try:
            data = self.client.get_object(bucket_name, file)
            with open(file_path, "wb") as fp:
                for d in data.stream(stream):
                    fp.write(d)
        except S3Error as e:
            print("[error]:", e)

    def fget_file(self, bucket_name, file, file_path):
        """
        下载保存文件保存本地
        :param bucket_name:
        :param file:
        :param file_path:
        :return:
        """
        self.client.fget_object(bucket_name, file, file_path)

    def get_section_data(self, bucket_name, file_name, start, size):
        '''
        获取切片数据
        :param bucket_name:
        :param file_name:
        :param start:
        :param size:
        :return:
        '''
        data = {'start': start, 'data': None}
        try:
            obj = self.client.get_object(bucket_name=bucket_name, object_name=file_name, offset=start, length=size)
            data = {'start': start, 'data': obj}

        except Exception as e:
            print('=============', e)

        return data

    def get_file_object(self, bucket_name, object_name):
        """
        获取文件对象
        :param bucket_name:
        :param file:
        :return:
        """
        pool_arr = []
        file_data = io.BytesIO()
        try:
            stat_obj = self.client.stat_object(bucket_name=bucket_name, object_name=object_name)
            total_length = stat_obj.size
            size = self.size
            total_page = self.get_page_count(total_length, size)

            total = 0
            for chunck in range(1, total_page + 1):
                start = (chunck - 1) * size
                if chunck == total_page:
                    size = total_length - total
                thread_item = self.processPool.submit(self.get_section_data, bucket_name, object_name, start, size)
                pool_arr.append(thread_item)

            for key, thread_res in tqdm(enumerate(as_completed(pool_arr)), unit='MB', unit_scale=True,
                                        unit_divisor=1024 * 1024, ascii=True, total=len(pool_arr), ncols=50):
                try:
                    _res = thread_res.result()
                    file_data.seek(_res['start'])
                    file_data.write(_res['data'].read())
                except Exception as e:
                    print(e)

        except Exception as e:
            print(e)

        return file_data.getvalue()

    def get_object_list(self, bucket_name):
        objects = []
        try:
            objects = self.client.list_objects(bucket_name)
        except Exception as e:
            print(e)

        return objects

    def get_page_count(self, total, per_page):
        """
        计算分页总数
        :param total: 记录总数
        :param per_page: 每页记录数
        :return: 分页总数
        """
        page_count = total // per_page
        if total % per_page != 0:
            page_count += 1
        return page_count

    def copy_file(self, bucket_name, file, file_path):
        """
        拷贝文件(最大支持5GB)
        :param bucket_name:
        :param file:
        :param file_path:
        :return:
        """
        self.client.copy_object(bucket_name, file, file_path)

    def upload_file(self, bucket_name, file, file_path, content_type):
        """
        上传文件 + 写入
        :param bucket_name: 桶名
        :param file: 文件名
        :param file_path: 本地文件路径
        :param content_type: 文件类型
        :return:
        """
        try:
            # Make bucket if not exist.
            found = self.client.bucket_exists(bucket_name)
            if not found:
                print("Bucket '{}' is not exists".format(bucket_name))
                self.client.make_bucket(bucket_name)

            with open(file_path, "rb") as file_data:
                file_stat = os.stat(file_path)
                self.client.put_object(bucket_name, file, file_data, file_stat.st_size, content_type=content_type)

        except S3Error as e:
            print("[error]:", e)

    def upload_object(self, bucket_name, file, file_data, content_type='binary/octet-stream'):
        """
        上传文件 + 写入
        :param bucket_name: 桶名
        :param file: 文件名
        :param file_data: bytes
        :param content_type: 文件类型 默认是appliction/octet-stream
        :return:
        """
        try:
            # Make bucket if not exist.
            found = self.client.bucket_exists(bucket_name)
            if not found:
                print("Bucket '{}' is not exists".format(bucket_name))
                self.client.make_bucket(bucket_name)

            buffer = io.BytesIO(file_data)
            st_size = len(file_data)
            self.client.put_object(bucket_name, file, buffer, st_size, content_type=content_type)
        except S3Error as e:
            print("[error]:", e)

    def fput_file(self, bucket_name, file, file_path):
        """
        上传文件
        :param bucket_name: 桶名
        :param file: 文件名
        :param file_path: 本地文件路径
        :return:
        """
        try:
            # Make bucket if not exist.
            found = self.client.bucket_exists(bucket_name)
            if not found:
                self.client.make_bucket(bucket_name)
            else:
                print("Bucket '{}' already exists".format(bucket_name))

            self.client.fput_object(bucket_name, file, file_path)
        except S3Error as e:
            print("[error]:", e)

    def stat_object(self, bucket_name, file, log=True):
        """
        获取文件元数据
        :param bucket_name:
        :param file:
        :return:
        """
        res = None
        try:
            data = self.client.stat_object(bucket_name, file)
            res = data
            if log:
                print(data.bucket_name)
                print(data.object_name)
                print(data.last_modified)
                print(data.etag)
                print(data.size)
                print(data.metadata)
                print(data.content_type)
        except S3Error as e:
            if log:
                print("[error]:", e)

        return res

    def remove_file(self, bucket_name, file):
        """
        移除单个文件
        :return:
        """
        self.client.remove_object(bucket_name, file)

    def remove_files(self, bucket_name, file_list):
        """
        删除多个文件
        :return:
        """
        delete_object_list = [DeleteObject(file) for file in file_list]
        for del_err in self.client.remove_objects(bucket_name, delete_object_list):
            print("del_err", del_err)

    def presigned_get_file(self, bucket_name, file, days=7):
        """
        生成一个http GET操作 签证URL
        :return:
        """
        return self.client.presigned_get_object(bucket_name, file, expires=timedelta(days=days))
相关推荐
databook23 分钟前
Manim实现闪光轨迹特效
后端·python·动效
Juchecar2 小时前
解惑:NumPy 中 ndarray.ndim 到底是什么?
python
用户8356290780512 小时前
Python 删除 Excel 工作表中的空白行列
后端·python
Json_2 小时前
使用python-fastApi框架开发一个学校宿舍管理系统-前后端分离项目
后端·python·fastapi
数据智能老司机9 小时前
精通 Python 设计模式——分布式系统模式
python·设计模式·架构
数据智能老司机10 小时前
精通 Python 设计模式——并发与异步模式
python·设计模式·编程语言
数据智能老司机10 小时前
精通 Python 设计模式——测试模式
python·设计模式·架构
数据智能老司机10 小时前
精通 Python 设计模式——性能模式
python·设计模式·架构
c8i10 小时前
drf初步梳理
python·django
每日AI新事件10 小时前
python的异步函数
python