数据分割的代码做成了一个小程序,第一次分割按照数字,第二次分割按照空格和汉字。
python
import tkinter as tk
from tkinter import filedialog, messagebox
import pandas as pd
import re
# 拆分字符串的函数(只拆分一次)
def split_once_by_zero(text):
if isinstance(text, str):
parts = text.split('0', 1)
if len(parts) == 1:
return [parts[0], None]
else:
return parts
else:
return [None, None]
# 使用正则表达式拆分字符串的函数
def split_by_regex(text, pattern):
if text is None:
return [None, None]
matches = re.split(pattern, text)
matches = [m for m in matches if m]
if len(matches) == 1:
return [matches[0], None]
else:
return matches
# 主拆分逻辑函数
def split_column_data(input_file, column_index, output_file):
try:
df = pd.read_excel(input_file, engine='openpyxl')
first_column_data = df.iloc[:, column_index]
split_data = first_column_data.apply(split_once_by_zero)
split_df = pd.DataFrame({
'字段1': [item[0] for item in split_data],
'字段2_临时': ['0' + item[1] if item[1] is not None else None for item in split_data]
}, index=df.index)
pattern = r'([^\u4e00-\u9fa5\s!@#$%^&*()_+\-=\[\]{};\':"\\|,.<>\/?]+)'
split_results = split_df['字段2_临时'].apply(lambda x: split_by_regex(x, pattern))
split_df[['字段2', '字段3']] = pd.DataFrame(
[[result[0], result[1] if len(result) > 1 else None] for result in split_results],
index=split_df.index
)
del split_df['字段2_临时']
split_df.to_excel(output_file, index=False)
messagebox.showinfo("成功", f"数据已保存到 {output_file}")
except Exception as e:
messagebox.showerror("错误", str(e))
# 选择输入文件函数
def select_input_file(entry):
file_path = filedialog.askopenfilename(filetypes=[("Excel files", "*.xlsx *.xls")])
if file_path:
entry.delete(0, tk.END)
entry.insert(0, file_path)
# 选择输出文件函数
def select_output_file(entry):
default_extension = ".xlsx"
output_file = filedialog.asksaveasfilename(defaultextension=default_extension,
filetypes=[("Excel files", "*.xlsx")])
if output_file:
if not output_file.endswith(default_extension):
output_file += default_extension
entry.delete(0, tk.END)
entry.insert(0, output_file)
# GUI的主函数
def main():
root = tk.Tk()
root.title("Excel 列拆分器")
# 输入文件路径
tk.Label(root, text="输入文件:").grid(row=0, column=0, sticky="e")
input_entry = tk.Entry(root)
input_entry.grid(row=0, column=1, sticky="ew")
tk.Button(root, text="浏览", command=lambda: select_input_file(input_entry)).grid(row=0, column=2)
# 列索引
tk.Label(root, text="列索引 (从0开始):").grid(row=1, column=0, sticky="e")
column_entry = tk.Entry(root)
column_entry.grid(row=1, column=1, sticky="ew")
# 输出文件路径
tk.Label(root, text="输出文件:").grid(row=2, column=0, sticky="e")
output_entry = tk.Entry(root)
output_entry.grid(row=2, column=1, sticky="ew")
tk.Button(root, text="保存为", command=lambda: select_output_file(output_entry)).grid(row=2, column=2)
# 拆分按钮
tk.Button(root, text="拆分列",
command=lambda: split_column_data(input_entry.get(), int(column_entry.get()), output_entry.get())).grid(
row=3, column=1, sticky="ew")
root.mainloop()
if __name__ == "__main__":
main()
有个bug这个程序不能完成第二次分割