图片转excel vlm 提取手写清单信息 Qwen/Qwen3-VL-235B-A22B-Instruct

Qwen/Qwen3-VL-235B-A22B-Instruct 提取图片数据很好用
python 复制代码
# vlm_png_to_excel.py
import json
import os
from PIL import Image
import pandas as pd
from openpyxl import Workbook
from openpyxl.styles import Alignment, Font
from openpyxl.utils import get_column_letter
import tempfile
from flask import Flask, request, jsonify, send_file
from werkzeug.utils import secure_filename
# 添加路径设置
import sys
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(current_dir)
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

# 修改导入语句
from llm_server.llm_server import VLMService

# Flask 应用实例
app = Flask(__name__)

# 创建 VLMService 实例
vlm_service = VLMService()

def parse_vlm_response(response_data):
    """
    解析 VLM 返回的结构化数据
    """
    try:
        # 获取响应内容
        result = response_data.get("choices", [{}])[0].get("message", {}).get("content", "{}")
        
        # 去除可能的代码块标记
        if result.startswith("```json"):
            result = result[7:]  # 去除开头的 ```json
        if result.startswith("```"):
            result = result[3:]  # 去除可能的 ```
        if result.endswith("```"):
            result = result[:-3]  # 去除结尾的 ```
        
        # 去除首尾空白字符
        result = result.strip()
        
        # 解析JSON
        parsed = json.loads(result)
        
        # 只保留 drawing_info 字段
        if "drawing_info" in parsed:
            return {"drawing_info": parsed["drawing_info"]}
        else:
            return {}
    except Exception as e:
        print(f"解析失败: {e}")
        return {}

def save_data_to_excel(data, output_path="output.xlsx", image_filename=None):
    """
    将结构化数据保存为Excel文件
    
    Args:
        data (dict): 结构化数据
        output_path (str): 输出Excel文件路径
        image_filename (str): 原始图像文件名，用于填入Excel前三行合并单元格
    """
    # 创建工作簿和工作表
    wb = Workbook()
    ws = wb.active
    
    # 如果提供了图像文件名，在前三行合并单元格并填入文件名
    if image_filename:
        # 写入文件名到第一行
        ws["A1"] = image_filename
        # 合并前三行
        ws.merge_cells("A1:E3")
        # 设置居中对齐和字体
        ws["A1"].alignment = Alignment(horizontal="center", vertical="center")
        ws["A1"].font = Font(size=14, bold=True)
        
        # 从第4行开始写入数据标题
        start_row = 4
    else:
        start_row = 1
    
    # 准备数据
    rows = []
    
    # 处理图纸基本信息
    if "drawing_info" in data and data["drawing_info"]:
        rows.append(["图纸信息", "", "", "", ""])
        rows.append(["图号", "长度(mm)", "宽度(mm)", "数量", "备注"])
        for item in data["drawing_info"]:
            drawing_number = item.get("drawing_number", "")
            length = item.get("length", "")
            width = item.get("width", "")
            quantity = item.get("quantity", "")
            remark = item.get("remark", "")
            rows.append([drawing_number, length, width, quantity, remark])
    
    # 移除了 dimensions 和 text 的处理部分
    
    # 将数据写入工作表
    for i, row in enumerate(rows, start=start_row):
        for j, value in enumerate(row, start=1):
            ws.cell(row=i, column=j, value=value)
    
    # 自动调整列宽
    for column in ws.columns:
        max_length = 0
        column_letter = get_column_letter(column[0].column)
        for cell in column:
            try:
                if len(str(cell.value)) > max_length:
                    max_length = len(str(cell.value))
            except:
                pass
        adjusted_width = (max_length + 2)
        ws.column_dimensions[column_letter].width = adjusted_width
    
    # 保存文件
    wb.save(output_path)
    print(f"Excel 文件已保存至: {output_path}")
    return output_path

def analyze_image_and_generate_excel(image_path):
    """
    主函数：调用 VLM 分析图像，并生成 Excel 文件
    
    Args:
        image_path (str): 输入图像路径
        output_path (str): 输出Excel文件路径，默认为"dxf_output/data/extracted_data.xlsx"
    """
   
    output_path = r"excel_output/npg2excel/extracted_data.xlsx"
    
    # 确保输出目录存在
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
    # Step 1: 构造请求消息
    messages = [
        {
            "role": "user",
            "content": '''请提取这张图片的信息，尽可能提取出图号、数量（要算）等信息，图号类似F2N-数字并以 JSON 格式返回。
输出格式要求：
{
    "drawing_info": [
        {
            "drawing_number": "ABC123",
            "length": "1200",
            "width": "800",
            "quantity": "5",
            "remark": "不锈钢板"
        }
    ]
}'''
        }
    ]

    # Step 2: 直接调用 VLMService 而不是通过 HTTP 请求
    try:
        result = vlm_service.create_with_image(messages, image_path)
        print("VLM 返回:", result)
        # 解析响应
        extracted_data = parse_vlm_response(result)
        # 生成 Excel
        save_data_to_excel(extracted_data, output_path, os.path.basename(image_path))
    except Exception as e:
        print("错误:", str(e))

@app.route('/api/analyze-image', methods=['GET'])
def analyze_image_api():
    """
    Flask 接口：通过URL参数指定图片路径，调用VLM分析并生成Excel文件
    
    参数:
    - image_path: 图片路径 (必需)

    """
    try:
        # 从请求参数中获取图片路径
        image_path = request.args.get('image_path')
     
         
        if not image_path:
            return jsonify({"error": "缺少 image_path 参数"}), 400
        
        # 检查输入文件是否存在
        if not os.path.exists(image_path):
            return jsonify({"error": f"图像文件 '{image_path}' 不存在"}), 400
            
        # 创建临时文件保存输出的Excel
        with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp_excel:
            output_path = tmp_excel.name
        
        # 调用分析函数
        analyze_image_and_generate_excel(image_path)
        
        # 返回生成的Excel文件
        return send_file(
            output_path,
            as_attachment=True,
           
            mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
        )
        
    except Exception as e:
        return jsonify({"error": str(e)}), 500


@app.route('/routes')
def show_routes():
    routes = []
    for rule in app.url_map.iter_rules():
        # 获取端点函数的docstring
        endpoint_func = app.view_functions.get(rule.endpoint)
        docstring = endpoint_func.__doc__.strip() if endpoint_func and endpoint_func.__doc__ else "无描述"
        
        routes.append({
            'endpoint': rule.endpoint,
            'methods': list(rule.methods),
            'rule': str(rule),
            'description': docstring
        })
    return {'routes': routes}

if __name__ == '__main__':
    # 检查是否以脚本方式直接运行
    if len(sys.argv) == 1:
        # 使用 input() 获取图像路径
        print("请输入图像文件路径（输入 'quit' 退出）:")
        while True:
            image_path = input("图像路径: ").strip()
            
            if image_path.lower() == 'quit':
                print("程序退出")
                break
                
            if not os.path.exists(image_path):
                print(f"错误: 图像文件 '{image_path}' 不存在，请重新输入")
                continue
                
            try:
                analyze_image_and_generate_excel(image_path)
                print("处理完成！")
                break
            except Exception as e:
                print(f"处理过程中出现错误: {e}")
                continue
    else:
        # Flask 服务器模式
        import argparse
        parser = argparse.ArgumentParser()
        parser.add_argument('--port', type=int, default=5001, help='Port to run the server on')
        args = parser.parse_args()
        app.run(host='0.0.0.0', port=args.port, debug=True)