Python处理文档

txt文件

读取

python 复制代码
# 方法1:read() 一次性读取整个文件
def read_txt_basic(filepath) -> str:
    with open(filepath, 'r', encoding='utf-8') as file:
        content = file.read()
    return content


# 方法2:readlines() 按行读取
def read_txt_line(filepath) -> list[str]:
    with open(filepath, 'r', encoding='utf-8') as file:
        content = file.readlines()
    return content

# 方法3:逐行读取(适合大文件),使用迭代器方法
def read_txt_line_by_line(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        for line in file:
            noblank = line.replace('\n', '').replace('\t','').strip()
            if noblank != '':
                yield noblank

# 方法4:使用exception做好保证
def read_txt_file_exception(filepath)->str:
    try:
        with open(filepath, 'r', encoding='utf-8') as file:
            return file.read()
    except FileNotFoundError:
        print(f'文件{filepath}不存在')
        return ''
    except UnicodeDecodeError:
        with open(filepath, 'r', encoding='gbk',errors='ignore') as file:
            return file.read()

# 高级:自动检测编码
import chardet
def read_txt_encoding_detection(filepath)->str:
    with open(filepath, 'rb') as file:
        raw_data = file.read()
        encoding = chardet.detect(raw_data)['encoding']

    with open(filepath, 'r', encoding=encoding) as file:
        return file.read()

写入

python 复制代码
# 方法1:write() 写入
def write_txt_basic(content, filepath):
    with open(filepath, 'w', encoding='utf-8') as file:
        file.write(content)


# 方法2:writelines() 写入多行
def write_txt_lines(lines, filepath):
    with open(filepath, 'w', encoding='utf-8') as file:
        # 确保每行都有换行符
        new_lines = [line + '\n' if not line.endswith('\n') else line for line in lines]
        file.writelines(new_lines)


# 多行读入,使用迭代器
def read_txt_lines(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        # 避开文件名输出
        print(file.readline())
        for line in file:
            yield line

# 方法3:追加写入
def write_txt_append(content,filepath):
    with open(filepath, 'a', encoding='utf-8') as file:
        file.write(content + '\n')

# 方法4:高性能批量写入
def write_txt_efficient(datalist,filepath,batch_size=1000):
    with open(filepath,'w',encoding='utf-8') as file:
        buffer=[]
        for i,item in enumerate(datalist,1):
            cleanstr = item.replace('\n', '').replace('\t', '').strip()
            if not cleanstr:
                continue
            buffer.append(str(cleanstr) + '\n')
            if i % batch_size == 0:
                file.writelines(buffer)
                buffer=[]
        if buffer:
            file.writelines(buffer)
相关推荐
NiceCloud喜云34 分钟前
Opus 4.8 的 Effort Control 怎么选:Low 到 Max 五档策略
android·java·大数据·前端·c++·python·spring
wordbaby1 小时前
React Native + RNOH:跨页面数据回传的最佳实践与避坑指南
前端·react native
丷丩1 小时前
MapLibre GL JS第22课:查看本地GeoJSON
前端·javascript·map·mapbox·maplibre gl js
AI玫瑰助手1 小时前
Python函数:默认参数的定义与注意事项
开发语言·python·信息可视化
weixin_468466851 小时前
全局与局部注意力机制新手实战指南
人工智能·python·深度学习·算法·自然语言处理·transformer·注意力机制
油炸自行车1 小时前
Claude Code 错误:API Error: 400 Failed to deserialize the JSON body into the
开发语言·javascript·json·trae·claude code·api error 400
肩上风骋1 小时前
C++14特性
开发语言·c++·c++14特性
小糖学代码2 小时前
LLM系列:环境搭建:5.Python-dotenv 环境变量管理
人工智能·python·深度学习·神经网络
智慧物业老杨2 小时前
智慧物业合同周期管理系统:从风险预警到智能交接的全流程数智化落地方案
java·人工智能·python
Front思2 小时前
AI前端工程师需要具备能力+
前端·人工智能·ai