Python处理文档

txt文件

读取

python 复制代码

# 方法1：read() 一次性读取整个文件
def read_txt_basic(filepath) -> str:
    with open(filepath, 'r', encoding='utf-8') as file:
        content = file.read()
    return content


# 方法2：readlines() 按行读取
def read_txt_line(filepath) -> list[str]:
    with open(filepath, 'r', encoding='utf-8') as file:
        content = file.readlines()
    return content

# 方法3：逐行读取（适合大文件），使用迭代器方法
def read_txt_line_by_line(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        for line in file:
            noblank = line.replace('\n', '').replace('\t','').strip()
            if noblank != '':
                yield noblank

# 方法4：使用exception做好保证
def read_txt_file_exception(filepath)->str:
    try:
        with open(filepath, 'r', encoding='utf-8') as file:
            return file.read()
    except FileNotFoundError:
        print(f'文件{filepath}不存在')
        return ''
    except UnicodeDecodeError:
        with open(filepath, 'r', encoding='gbk',errors='ignore') as file:
            return file.read()

# 高级：自动检测编码
import chardet
def read_txt_encoding_detection(filepath)->str:
    with open(filepath, 'rb') as file:
        raw_data = file.read()
        encoding = chardet.detect(raw_data)['encoding']

    with open(filepath, 'r', encoding=encoding) as file:
        return file.read()

写入

python 复制代码

# 方法1：write() 写入
def write_txt_basic(content, filepath):
    with open(filepath, 'w', encoding='utf-8') as file:
        file.write(content)


# 方法2：writelines() 写入多行
def write_txt_lines(lines, filepath):
    with open(filepath, 'w', encoding='utf-8') as file:
        # 确保每行都有换行符
        new_lines = [line + '\n' if not line.endswith('\n') else line for line in lines]
        file.writelines(new_lines)


# 多行读入，使用迭代器
def read_txt_lines(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        # 避开文件名输出
        print(file.readline())
        for line in file:
            yield line

# 方法3：追加写入
def write_txt_append(content,filepath):
    with open(filepath, 'a', encoding='utf-8') as file:
        file.write(content + '\n')

# 方法4：高性能批量写入
def write_txt_efficient(datalist,filepath,batch_size=1000):
    with open(filepath,'w',encoding='utf-8') as file:
        buffer=[]
        for i,item in enumerate(datalist,1):
            cleanstr = item.replace('\n', '').replace('\t', '').strip()
            if not cleanstr:
                continue
            buffer.append(str(cleanstr) + '\n')
            if i % batch_size == 0:
                file.writelines(buffer)
                buffer=[]
        if buffer:
            file.writelines(buffer)