Python处理文档

txt文件

读取

python 复制代码
# 方法1:read() 一次性读取整个文件
def read_txt_basic(filepath) -> str:
    with open(filepath, 'r', encoding='utf-8') as file:
        content = file.read()
    return content


# 方法2:readlines() 按行读取
def read_txt_line(filepath) -> list[str]:
    with open(filepath, 'r', encoding='utf-8') as file:
        content = file.readlines()
    return content

# 方法3:逐行读取(适合大文件),使用迭代器方法
def read_txt_line_by_line(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        for line in file:
            noblank = line.replace('\n', '').replace('\t','').strip()
            if noblank != '':
                yield noblank

# 方法4:使用exception做好保证
def read_txt_file_exception(filepath)->str:
    try:
        with open(filepath, 'r', encoding='utf-8') as file:
            return file.read()
    except FileNotFoundError:
        print(f'文件{filepath}不存在')
        return ''
    except UnicodeDecodeError:
        with open(filepath, 'r', encoding='gbk',errors='ignore') as file:
            return file.read()

# 高级:自动检测编码
import chardet
def read_txt_encoding_detection(filepath)->str:
    with open(filepath, 'rb') as file:
        raw_data = file.read()
        encoding = chardet.detect(raw_data)['encoding']

    with open(filepath, 'r', encoding=encoding) as file:
        return file.read()

写入

python 复制代码
# 方法1:write() 写入
def write_txt_basic(content, filepath):
    with open(filepath, 'w', encoding='utf-8') as file:
        file.write(content)


# 方法2:writelines() 写入多行
def write_txt_lines(lines, filepath):
    with open(filepath, 'w', encoding='utf-8') as file:
        # 确保每行都有换行符
        new_lines = [line + '\n' if not line.endswith('\n') else line for line in lines]
        file.writelines(new_lines)


# 多行读入,使用迭代器
def read_txt_lines(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        # 避开文件名输出
        print(file.readline())
        for line in file:
            yield line

# 方法3:追加写入
def write_txt_append(content,filepath):
    with open(filepath, 'a', encoding='utf-8') as file:
        file.write(content + '\n')

# 方法4:高性能批量写入
def write_txt_efficient(datalist,filepath,batch_size=1000):
    with open(filepath,'w',encoding='utf-8') as file:
        buffer=[]
        for i,item in enumerate(datalist,1):
            cleanstr = item.replace('\n', '').replace('\t', '').strip()
            if not cleanstr:
                continue
            buffer.append(str(cleanstr) + '\n')
            if i % batch_size == 0:
                file.writelines(buffer)
                buffer=[]
        if buffer:
            file.writelines(buffer)
相关推荐
Json____1 小时前
前端入门练习题集-HTML/CSS/JS实战小项目15个
前端·css·html
cici158741 小时前
MATLAB GUI构建一个AIS自动船舶系统
开发语言·matlab
一氧化二氢.h1 小时前
【java】的数组列表和集合的区别是什么
java·开发语言
Shan12051 小时前
实例分析:重载自定义参数的new
开发语言·c++
2401_898717661 小时前
HTML5中SVG原生动画标签Animate的基础用法
jvm·数据库·python
科研小白_1 小时前
【第二期:MATLAB点云处理基础】KD树与点云邻域搜索
java·前端·人工智能
小江的记录本1 小时前
【MySQL】《MySQL基础架构 面试核心考点问答清单》
前端·数据库·后端·sql·mysql·adb·面试
爱网络爱Linux1 小时前
华为HCIP——BGP 基础配置
服务器·前端·华为·hcip·hcip datacom·华为数通认证
IT策士1 小时前
Python PPT操作:从入门到精通
开发语言·python·powerpoint