Python处理文档

txt文件

读取

python 复制代码
# 方法1:read() 一次性读取整个文件
def read_txt_basic(filepath) -> str:
    with open(filepath, 'r', encoding='utf-8') as file:
        content = file.read()
    return content


# 方法2:readlines() 按行读取
def read_txt_line(filepath) -> list[str]:
    with open(filepath, 'r', encoding='utf-8') as file:
        content = file.readlines()
    return content

# 方法3:逐行读取(适合大文件),使用迭代器方法
def read_txt_line_by_line(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        for line in file:
            noblank = line.replace('\n', '').replace('\t','').strip()
            if noblank != '':
                yield noblank

# 方法4:使用exception做好保证
def read_txt_file_exception(filepath)->str:
    try:
        with open(filepath, 'r', encoding='utf-8') as file:
            return file.read()
    except FileNotFoundError:
        print(f'文件{filepath}不存在')
        return ''
    except UnicodeDecodeError:
        with open(filepath, 'r', encoding='gbk',errors='ignore') as file:
            return file.read()

# 高级:自动检测编码
import chardet
def read_txt_encoding_detection(filepath)->str:
    with open(filepath, 'rb') as file:
        raw_data = file.read()
        encoding = chardet.detect(raw_data)['encoding']

    with open(filepath, 'r', encoding=encoding) as file:
        return file.read()

写入

python 复制代码
# 方法1:write() 写入
def write_txt_basic(content, filepath):
    with open(filepath, 'w', encoding='utf-8') as file:
        file.write(content)


# 方法2:writelines() 写入多行
def write_txt_lines(lines, filepath):
    with open(filepath, 'w', encoding='utf-8') as file:
        # 确保每行都有换行符
        new_lines = [line + '\n' if not line.endswith('\n') else line for line in lines]
        file.writelines(new_lines)


# 多行读入,使用迭代器
def read_txt_lines(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        # 避开文件名输出
        print(file.readline())
        for line in file:
            yield line

# 方法3:追加写入
def write_txt_append(content,filepath):
    with open(filepath, 'a', encoding='utf-8') as file:
        file.write(content + '\n')

# 方法4:高性能批量写入
def write_txt_efficient(datalist,filepath,batch_size=1000):
    with open(filepath,'w',encoding='utf-8') as file:
        buffer=[]
        for i,item in enumerate(datalist,1):
            cleanstr = item.replace('\n', '').replace('\t', '').strip()
            if not cleanstr:
                continue
            buffer.append(str(cleanstr) + '\n')
            if i % batch_size == 0:
                file.writelines(buffer)
                buffer=[]
        if buffer:
            file.writelines(buffer)
相关推荐
IT_陈寒1 小时前
React的useState居然还有这种坑?我差点删库跑路
前端·人工智能·后端
Pedantic2 小时前
SwiftUI 手势笔记
前端·后端
金銀銅鐵2 小时前
[Python] 从《千字文》中随机挑选汉字
后端·python
橙子家3 小时前
浏览器缓存之【结构化数据库与缓存】: IndexedDB、Cache storage 和 Storage buckets
前端
user20585561518133 小时前
X6 中边悬浮置顶,规避 `mouseleave` 事件丢失问题
前端
李明卫杭州3 小时前
CSS aspect-ratio 属性完全指南
前端
Pedantic5 小时前
SwiftUI 手势层级(Gesture Hierarchy)详解
前端
飘尘5 小时前
前端转型全栈(Java后端)的快速上手指引
前端·后端·全栈
一颗烂土豆5 小时前
Meshopt 压缩深度解析,为什么它比 Draco 更快
前端·javascript·webgl