import pandas as pd
import re
import os
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
读取 Excel 文件
input_file_path = r"C:\Users\28952\Desktop\sop调货\临时拷贝\Mano临时表.xlsx"
检查文件是否存在
if not os.path.exists(input_file_path):
print("文件不存在,请检查路径。")
else:
尝试读取文件
try:
df = pd.read_excel(input_file_path, sheet_name='Sheet1')
except Exception as e:
print("读取文件时出错:", e)
exit()
初始化结果列表
names_companies = \[\]
addresses = \[\]
postal_codes = \[\]
cities = \[\]
countries = \[\]
first_names = \[\]
last_names = \[\]
遍历每一行数据进行匹配
for index, row in df.iterrows():
line = row'地址栏' if '地址' in row else "" # 假设地址在"地址"列中
customer_name = row'客户名称' if '客户名称' in row else "" # 假设客户名称在"客户名称"列中
确保 line 是字符串
if isinstance(line, str):
提取姓名和公司名
name_company = re.findall(r'^(.*?)(?=,\s*|\s*$)', line)
names_companies.append(name_company0 if name_company else "") # 过滤空值
提取邮政编码
postal_code = re.findall(r'\d{5}', line)
postal_codes.append(postal_code0 if postal_code else "") # 确保输出为空字符串
提取地址
postal_code_match = re.search(r'\d{5}', line)
if postal_code_match:
提取姓名和公司名后面的内容
address_fallback = line.split(',')1.strip() if ',' in line else ""
address_fallback = address_fallback.split(postal_code_match.group(0))0.strip() if postal_code_match else ""
addresses.append(address_fallback if address_fallback else "")
else:
addresses.append("") # 如果没有找到地址,输出空字符串
提取城市
city = re.findall(r'(?<=\d{5}\s)(.*?)(?=\s+(France FR|España ES|United Kingdom GB|Italia IT|Deutchland DE))', line)
if city:
cities.append(city00) # 只添加城市名称
else:
cities.append(None) # 如果没有匹配到城市,添加 None
提取国家
country_match = re.findall(r'(France|España|United Kingdom|Italia|Deutchland)\s+A-Z{2}', line)
if country_match:
countries.append(country_match0) # 只添加国家名称
else:
countries.append(None) # 如果没有匹配到国家,添加 None
else:
names_companies.append("")
addresses.append("")
postal_codes.append("")
cities.append(None) # 使用 None
countries.append(None) # 使用 None
处理客户名称
if isinstance(customer_name, str) and len(customer_name.split()) == 2:
first_name, last_name = customer_name.split()
first_names.append(first_name)
last_names.append(last_name)
else:
first_names.append("")
last_names.append("")
将结果填充到 DataFrame 中
df'姓名和公司名' = names_companies
df'地址' = addresses
df'邮编' = postal_codes
df'城市' = cities
df'国家' = countries
df'名' = first_names
df'姓' = last_names
# 将结果写入现有的 Excel 文件,确保所有列为文本格式
try:
wb = Workbook()
ws = wb.active
# 将 DataFrame 写入工作表
for r in dataframe_to_rows(df, index=False, header=True):
ws.append(r)
**# 设置所有列为文本格式
for col in ws.columns:
for cell in col:
cell.number_format = '@' # 设置为文本格式
特别处理手机号列(假设在 '邮编' 列)
for cell in ws'C': # 假设手机号在 C 列
cell.number_format = '@' # 设置为文本格式**
# 保存文件
wb.save(input_file_path)
print("数据处理完成,结果已保存到:", input_file_path)
except Exception as e:
print("写入文件时出错:", e)