bash
复制代码
#!/usr/bin/env python
#-*- coding: utf-8 -*-
# vim:fenc=utf-8
# @author tlwlmy
# @version 2026-01-31
import os
from openpyxl import load_workbook
from openpyxl.drawing.image import Image
import re
def export_images_by_column_openpyxl(excel_path,
name_column='A',
output_folder="named_images",
sheet_name=None):
"""
根据指定列的单元格内容为图片命名
参数:
- excel_path: Excel文件路径
- name_column: 用于命名的列字母(如 'A', 'B', 'C')
- output_folder: 输出文件夹
- sheet_name: 指定工作表名称,None表示所有工作表
"""
# 创建输出文件夹
os.makedirs(output_folder, exist_ok=True)
# 加载工作簿
wb = load_workbook(excel_path)
# 确定要处理的工作表
if sheet_name:
sheets = [wb[sheet_name]]
else:
sheets = wb.worksheets
image_count = 0
success_count = 0
for ws in sheets:
print(f"\n正在处理工作表: {ws.title}")
# 获取所有图片
images = ws._images
for img in images:
image_count += 1
# 获取图片的锚点位置
# openpyxl中,图片位置信息在anchor属性中
if hasattr(img, 'anchor'):
# 获取图片左上角所在的单元格
# 不同版本的openpyxl可能有所不同
try:
# 方法1:尝试获取from属性
if hasattr(img.anchor, '_from'):
col_idx = img.anchor._from.col # 列索引(1开始)
# 这里需要调整索引位置,匹配开始位置
row_idx = img.anchor._from.row + 1 # 行索引(1开始)
print(col_idx, row_idx)
# 方法2:尝试直接获取属性
elif hasattr(img, 'left') and hasattr(img, 'top'):
# 通过坐标计算最近单元格
col_idx = int(img.left // 64) + 1 # 近似计算
row_idx = int(img.top // 20) + 1 # 近似计算
else:
print(f" 图片{image_count}: 无法获取位置信息,使用默认名称")
col_idx = row_idx = None
except:
col_idx = row_idx = None
if col_idx and row_idx:
# 确定命名列的位置
# 假设图片与命名单元格在同一行
try:
# 获取命名列的单元格值
name_cell = f"{name_column}{row_idx}"
print('name_cell', name_cell)
name_value = ws[name_cell].value
if name_value:
# 清理文件名(移除非法字符)
clean_name = clean_filename(str(name_value))
filename = f"{clean_name}.png"
# 检查文件名是否重复
base_name = clean_name
counter = 1
while os.path.exists(os.path.join(output_folder, filename)):
filename = f"{base_name}_{counter}.png"
counter += 1
else:
filename = f"图片{image_count}.png"
except:
filename = f"图片{image_count}.png"
else:
filename = f"图片{image_count}.png"
else:
filename = f"图片{image_count}.png"
# 保存图片
filepath = os.path.join(output_folder, filename)
try:
with open(filepath, "wb") as f:
f.write(img._data())
success_count += 1
print(f" ✓ 已保存: {filename}")
except Exception as e:
print(f" ✗ 保存失败: {filename}, 错误: {e}")
print(f"\n{'='*50}")
print(f"处理完成!")
print(f"总图片数: {image_count}")
print(f"成功导出: {success_count}")
print(f"保存位置: {os.path.abspath(output_folder)}")
return success_count
def clean_filename(filename):
"""清理文件名,移除非法字符"""
# 移除非法字符
illegal_chars = r'[<>:"/\\|?*]'
filename = re.sub(illegal_chars, '', filename)
# 限制长度
if len(filename) > 100:
filename = filename[:100]
# 移除首尾空格
filename = filename.strip()
# 如果清空后为空,使用默认名称
if not filename:
filename = "未命名"
return filename
# 使用示例
if __name__ == "__main__":
excel_file = "kefumei.xlsx"
# 示例1:使用A列的内容命名
export_images_by_column_openpyxl(
excel_path=excel_file,
name_column='B', # 使用B列命名
output_folder="exported_images",
sheet_name=None # 处理所有工作表
)
bash
复制代码
#!/usr/bin/env python
#-*- coding: utf-8 -*-
# vim:fenc=utf-8
# @author tlwlmy
# @version 2026-01-31
import os
import olefile
import xlrd
from PIL import Image
import io
import re
def export_images_from_xls_by_column(excel_path,
name_column='A',
output_folder="xls_named_images",
sheet_index=0):
"""
从.xls文件导出图片并按指定列命名
参数:
- excel_path: .xls文件路径
- name_column: 命名列字母(如'A','B')或索引(0开始)
- output_folder: 输出文件夹
- sheet_index: 工作表索引
"""
# 创建输出文件夹
os.makedirs(output_folder, exist_ok=True)
# 验证文件格式
if not excel_path.lower().endswith('.xls'):
print("错误:仅支持.xls格式文件")
return False
try:
# 步骤1:使用xlrd读取单元格数据(获取命名信息)
print("正在读取Excel数据...")
xlrd_book = xlrd.open_workbook(excel_path)
sheet = xlrd_book.sheet_by_index(sheet_index)
# 将列字母转换为索引(0开始)
if isinstance(name_column, str):
col_index = xlrd.colname(name_column) # 转换为索引
else:
col_index = name_column # 假设已经是数字索引
# 获取命名列的所有值
name_values = []
for row_idx in range(sheet.nrows):
cell_value = sheet.cell_value(row_idx, col_index)
name_values.append(str(cell_value) if cell_value else f"行{row_idx+1}")
print(f"找到 {len(name_values)} 个命名数据")
ole = olefile.OleFileIO(excel_path)
for stream_name in ole.listdir():
if ole.get_type(stream_name) == olefile.STGTY_STREAM:
size = ole.get_size(stream_name)
print(f"流: {stream_name}, 大小: {size} 字节")
# 步骤2:使用olefile解析.xls文件的OLE结构
print("正在解析图片数据...")
ole = olefile.OleFileIO(excel_path)
# Excel 中图片通常在这些位置
image_patterns = [
'MBD', # Microsoft Drawing
'Pictures', # 图片
'\x01Ole', # OLE 对象
'CONTENTS', # 内容流
]
images = []
for stream_path in ole.listdir():
stream_name = '/'.join(stream_path)
# 检查流名是否包含图片相关关键词
if any(pattern in stream_name for pattern in image_patterns):
print(f"检查流: {stream_name}")
try:
data = ole.openstream(stream_path).read()
# 使用启发式方法查找图片
image_data = find_image_in_binary(data)
if image_data:
images.extend(image_data)
except Exception as e:
print(f"读取 {stream_name} 时出错: {e}")
print('Image', images)
# 在OLE流中查找图片
image_streams = []
for stream_name in ole.listdir():
# 寻找可能包含图片的流
print('stream_name', stream_name)
if any(keyword in stream_name[0].lower() for keyword in ['mso', 'picture', 'image']):
image_streams.append(stream_name)
elif len(stream_name) > 1 and 'pict' in stream_name[1].lower():
image_streams.append(stream_name)
print(f"找到 {len(image_streams)} 个图片流")
# 步骤3:提取并保存图片
image_count = 0
saved_count = 0
for i, stream_path in enumerate(image_streams):
try:
# 读取流数据
stream_data = ole.openstream(stream_path).read()
if len(stream_data) < 100: # 太小的可能不是图片
continue
image_count += 1
# 尝试确定图片名称
if i < len(name_values):
# 使用对应行的命名
raw_name = name_values[i]
else:
# 如果图片多于数据行
raw_name = f"图片_{image_count}"
# 清理文件名
clean_name = clean_filename(raw_name)
# 尝试不同格式保存
success = False
# 尝试作为PNG
try:
filename = f"{clean_name}.png"
filepath = os.path.join(output_folder, filename)
# 尝试用PIL打开
image = Image.open(io.BytesIO(stream_data))
image.save(filepath, 'PNG')
success = True
except:
# 尝试作为JPEG
try:
filename = f"{clean_name}.jpg"
filepath = os.path.join(output_folder, filename)
# 对于.xls,图片可能是BMP、WMF等格式
# 尝试直接保存为二进制文件
with open(filepath, 'wb') as f:
f.write(stream_data)
success = True
except Exception as e:
print(f" 图片{i+1}保存失败: {e}")
continue
if success:
saved_count += 1
print(f" ✓ [{saved_count}] 已保存: {filename}")
except Exception as e:
print(f" 处理流 {stream_path} 时出错: {e}")
continue
ole.close()
print(f"\n{'='*50}")
print(f"处理完成!")
print(f"发现图片流: {len(image_streams)}")
print(f"成功导出: {saved_count}")
print(f"保存位置: {os.path.abspath(output_folder)}")
return saved_count > 0
except Exception as e:
print(f"处理文件时发生错误: {e}")
return False
def clean_filename(filename):
"""清理文件名"""
# 移除非法字符
illegal_chars = r'[<>:"/\\|?*]'
filename = re.sub(illegal_chars, '', str(filename))
# 限制长度
filename = filename.strip()
if len(filename) > 80:
filename = filename[:80]
# 确保不为空
if not filename:
filename = "未命名"
return filename
def is_image_data(data):
"""检查二进制数据是否为常见图片格式"""
if len(data) < 12: # 太小不可能是图片
return False
# 常见图片格式的魔术头
image_signatures = {
b'\xFF\xD8\xFF': 'JPEG',
b'\x89PNG\r\n\x1a\n': 'PNG',
b'BM': 'BMP',
b'GIF87a': 'GIF',
b'GIF89a': 'GIF',
b'\x00\x00\x01\x00': 'ICO', # ICO
b'\x00\x00\x02\x00': 'CUR', # CUR
b'RIFF': 'WEBP', # WebP (需要进一步检查)
}
for signature, format_name in image_signatures.items():
if data.startswith(signature):
return True, format_name
# 检查 TIFF (有两种字节序)
if data.startswith(b'II\x2A\x00') or data.startswith(b'MM\x00\x2A'):
return True, 'TIFF'
return False, None
def extract_images_from_binary(data):
"""从二进制数据中提取所有图片"""
images = []
# 常见图片的开始标记
patterns = {
b'\xFF\xD8\xFF': 'jpg', # JPEG SOI
b'\x89PNG\r\n\x1a\n': 'png',
b'BM': 'bmp',
b'GIF87a': 'gif',
b'GIF89a': 'gif',
}
position = 0
while position < len(data):
found = False
for pattern, ext in patterns.items():
idx = data.find(pattern, position)
if idx != -1:
# 找到图片开始位置
start = idx
# 查找图片结束位置
if pattern == b'\xFF\xD8\xFF': # JPEG
end = find_jpeg_end(data, start)
elif pattern == b'\x89PNG\r\n\x1a\n': # PNG
end = find_png_end(data, start)
elif pattern.startswith(b'BM'): # BMP
end = find_bmp_end(data, start)
elif pattern.startswith(b'GIF'): # GIF
end = find_gif_end(data, start)
else:
end = start + 1 # 默认
if end > start:
image_data = data[start:end]
images.append({
'format': ext,
'data': image_data,
'start': start,
'end': end,
'size': len(image_data)
})
position = end
found = True
break
if not found:
position += 1
return images
def find_jpeg_end(data, start):
"""查找 JPEG 结束标记 (FF D9)"""
end_marker = b'\xFF\xD9'
end = data.find(end_marker, start)
if end != -1:
return end + 2 # 包括结束标记
return start + 1
def find_png_end(data, start):
"""查找 PNG IEND 块"""
iend_marker = b'IEND\xAE\x42\x60\x82'
end = data.find(iend_marker, start)
if end != -1:
return end + 8 # IEND 块长度
return start + 1
def extract_and_save_images(file_path, output_dir='extracted_images'):
"""从 OLE 文件中提取所有图片并保存"""
if not olefile.isOleFile(file_path):
print("不是 OLE 文件")
return []
# 创建输出目录
os.makedirs(output_dir, exist_ok=True)
ole = olefile.OleFileIO(file_path)
saved_images = []
print(f"分析文件: {file_path}")
print(f"目录结构:")
ole.dumpdirectory() # 显示详细目录结构
# 遍历所有流
for i, stream_path in enumerate(ole.listdir()):
stream_name = '/'.join(stream_path)
print('stream_name', stream_name)
if ole.get_type(stream_name) == olefile.STGTY_STREAM:
size = ole.get_size(stream_name)
print(f"流: {stream_name}, 大小: {size} 字节")
try:
# 读取流数据
data = ole.openstream(stream_path).read()
if len(data) > 100: # 忽略太小的流
# 检查是否是图片
is_image, img_type = is_image_data(data)
if is_image:
# 直接保存
ext = img_type.lower() if img_type else 'bin'
filename = f"image_{i}_{stream_name.replace('/', '_')}.{ext}"
filepath = os.path.join(output_dir, filename)
with open(filepath, 'wb') as f:
f.write(data)
saved_images.append(filepath)
print(f"✓ 保存图片: {filename} ({len(data)} 字节)")
else:
# 尝试从二进制数据中提取图片
images = extract_images_from_binary(data)
for j, img_info in enumerate(images):
filename = f"extracted_{i}_{j}.{img_info['format']}"
filepath = os.path.join(output_dir, filename)
with open(filepath, 'wb') as f:
f.write(img_info['data'])
saved_images.append(filepath)
print(f"✓ 提取图片: {filename} ({img_info['size']} 字节)")
except Exception as e:
print(f"处理流 {stream_name} 时出错: {e}")
continue
ole.close()
print(f"\n总共提取了 {len(saved_images)} 张图片到目录: {output_dir}")
return saved_images
# 使用示例
if __name__ == "__main__":
excel_file = "kefumei.xls" # 你的.xls文件
# 提取 Excel 中的图片
images = extract_and_save_images(excel_file, 'excel_images')
"""
# 示例:使用A列命名
export_images_from_xls_by_column(
excel_path=excel_file,
name_column=2, # 使用A列
output_folder="kefumei_image",
sheet_index=0 # 第一个工作表
)
"""