使用python实现从PDF格式的control mapping获取gross die数量

需求:通过fab给出的如下PDF格式的control mapping获取一片晶圆芯片数量

实现逻辑思路

✅ 正确解析 X/cell 和 Y/cell

✅ 字段命名清晰:Reticle_Die_X(= X/cell)、Reticle_Die_Y(= Y/cell)

✅ 自动提取 Chip Size、Offset、Notch Angle、Ring Edge、Photo Die Number

✅ 动态计算 Die 数量,支持任意 Reticle 布局

✅ 输出 CSV 到所选文件夹,带 ✅/❌ 匹配结果

具体python代码实现:

python 复制代码
import csv
import re
import numpy as np
import pdfplumber
from pathlib import Path
import tkinter as tk
from tkinter import filedialog

# 隐藏主窗口
root = tk.Tk()
root.withdraw()

# ==============================
# 用户选择输入文件夹
# ==============================
print("📂 请选择包含 CONTROL MAPPING PDF 的文件夹...")
input_folder = filedialog.askdirectory(title="选择 PDF 所在文件夹")
if not input_folder:
    print("❌ 未选择文件夹,程序退出。")
    exit()

INPUT_FOLDER = Path(input_folder)
OUTPUT_CSV = INPUT_FOLDER / "die_count_results.csv"

# ==============================
# 默认回退值(PDF 中缺失时使用)
# ==============================
DEFAULTS = {
    'reticle_die_x': 26,   # Reticle Columns → X direction
    'reticle_die_y': 27,   # Reticle Rows    → Y direction
    'notch_min_angle': 263.83,
    'notch_max_angle': 275.81,
    'ring_edge': 3.0
}


# ==============================
# 推断晶圆尺寸(基于 Alignment Mark 坐标)
# ==============================
def infer_wafer_size_from_alignment_marks(alignment_marks):
    max_coord = max(max(abs(x), abs(y)) for x, y in alignment_marks)
    if max_coord < 70:
        return 150
    elif max_coord < 120:
        return 200
    else:
        return 300


# ==============================
# 从 PDF 提取所有关键参数
# ==============================
def extract_parameters_from_pdf(pdf_path):
    parameters = {}
    try:
        with pdfplumber.open(pdf_path) as pdf:
            full_text = ""
            for page in pdf.pages:
                text = page.extract_text()
                if text:
                    full_text += text + "\n"

        lines = full_text.splitlines()

        for line in lines:
            line = line.strip()
            if not line:
                continue

            # --- Chip Size ---
            if "Chip Size" in line:
                x_match = re.search(r'[Xx][=:]\s*([+-]?\d*\.?\d+)', line)
                y_match = re.search(r'[Yy][=:]\s*([+-]?\d*\.?\d+)', line)
                if x_match and y_match:
                    parameters['chip_size'] = (float(x_match.group(1)), float(y_match.group(1)))

            # --- Offset Value ---
            elif "Offset Value" in line:
                x_match = re.search(r'[Xx][=:]\s*([+-]?\d*\.?\d+)', line)
                y_match = re.search(r'[Yy][=:]\s*([+-]?\d*\.?\d+)', line)
                if x_match and y_match:
                    parameters['offset_x'] = float(x_match.group(1))
                    parameters['offset_y'] = float(y_match.group(1))

            # --- Photo Die Number ---
            elif "Photo Die Number" in line and ":" in line:
                try:
                    val = int(re.search(r'\d+', line.split(":")[1]).group())
                    parameters['photo_die_number'] = val
                except:
                    pass

            # --- Ring Edge ---
            elif "Ring Edge" in line and ":" in line:
                try:
                    val = float(re.search(r'[\d.]+', line.split(":")[1]).group())
                    parameters['ring_edge'] = val
                except:
                    pass

            # --- Notch Angle (支持多种格式) ---
            elif "Notch" in line and ("Angle" in line or "°" in line):
                angles = re.findall(r'\d*\.?\d+', line)
                if len(angles) >= 2:
                    try:
                        a1, a2 = sorted([float(angles[0]), float(angles[1])])
                        parameters['notch_min_angle'] = a1
                        parameters['notch_max_angle'] = a2
                    except:
                        pass

            # --- Alignment Mark ---
            elif "Alignment Mark" in line:
                coords = re.findall(r'[+-]?\d*\.?\d+', line)
                if len(coords) >= 4:
                    try:
                        x1, y1, x2, y2 = map(float, coords[:4])
                        parameters['alignment_marks'] = [(x1, y1), (x2, y2)]
                    except Exception:
                        pass

            # === 提取 Reticle Die Count in X (来自 "Reticle Columns" 或 "X/cell") ===
        x_cell_match = re.search(
            r'(?:[Rr]eticle\s+[Cc]olumns?|X[/\\]cell)\s*[:=]\s*(\d+)',
            full_text,
            re.IGNORECASE | re.DOTALL
        )
        if x_cell_match:
            parameters['reticle_die_x'] = int(x_cell_match.group(1))
        else:
            # Fallback: 单独找 "X/cell = 25"
            fallback_x = re.search(r'X[/\\]cell\s*[:=]\s*(\d+)', full_text, re.IGNORECASE)
            if fallback_x:
                parameters['reticle_die_x'] = int(fallback_x.group(1))

        # === 提取 Reticle Die Count in Y (来自 "Reticle Rows" 或 "Y/cell") ===
        y_cell_match = re.search(
            r'(?:[Rr]eticle\s+[Rr]ows?|Y[/\\]cell)\s*[:=]\s*(\d+)',
            full_text,
            re.IGNORECASE | re.DOTALL
        )
        if y_cell_match:
            parameters['reticle_die_y'] = int(y_cell_match.group(1))
        else:
            # Fallback: 单独找 "Y/cell = 40"
            fallback_y = re.search(r'Y[/\\]cell\s*[:=]\s*(\d+)', full_text, re.IGNORECASE)
            if fallback_y:
                parameters['reticle_die_y'] = int(fallback_y.group(1))

        # 必需字段检查
        if 'chip_size' not in parameters:
            raise ValueError("Chip Size not found")

        # 设置默认值
        parameters.setdefault('offset_x', 0.0)
        parameters.setdefault('offset_y', 0.0)
        parameters.setdefault('reticle_die_x', DEFAULTS['reticle_die_x'])
        parameters.setdefault('reticle_die_y', DEFAULTS['reticle_die_y'])
        parameters.setdefault('notch_min_angle', DEFAULTS['notch_min_angle'])
        parameters.setdefault('notch_max_angle', DEFAULTS['notch_max_angle'])
        parameters.setdefault('ring_edge', DEFAULTS['ring_edge'])

        return parameters

    except Exception as e:
        raise ValueError(f"Parse failed: {str(e)}")


# ==============================
# 动态计算有效 Die 数量
# ==============================
def calculate_die_count(parameters, wafer_radius):
    chip_w, chip_h = parameters['chip_size']
    offset_x = parameters['offset_x']
    offset_y = parameters['offset_y']
    ring_edge = parameters['ring_edge']
    reticle_die_x = parameters['reticle_die_x']   # X方向 die 数(原 Columns)
    reticle_die_y = parameters['reticle_die_y']   # Y方向 die 数(原 Rows)
    notch_min = parameters['notch_min_angle']
    notch_max = parameters['notch_max_angle']

    effective_radius = wafer_radius - ring_edge
    reticle_w = reticle_die_x * chip_w
    reticle_h = reticle_die_y * chip_h

    max_reticles_x = int((wafer_radius + abs(offset_x)) / reticle_w) + 5
    max_reticles_y = int((wafer_radius + abs(offset_y)) / reticle_h) + 5

    total_count = 0

    for i in range(-max_reticles_x, max_reticles_x + 1):
        for j in range(-max_reticles_y, max_reticles_y + 1):
            reticle_x0 = offset_x + i * reticle_w
            reticle_y0 = offset_y + j * reticle_h

            # 快速跳过完全在圆外的 Reticle
            if (reticle_x0 + reticle_w < -effective_radius or
                reticle_x0 > effective_radius or
                reticle_y0 + reticle_h < -effective_radius or
                reticle_y0 > effective_radius):
                continue

            for di in range(reticle_die_x):
                for dj in range(reticle_die_y):
                    die_x = reticle_x0 + (di + 0.5) * chip_w
                    die_y = reticle_y0 + (dj + 0.5) * chip_h

                    r = np.sqrt(die_x**2 + die_y**2)
                    if r > effective_radius:
                        continue

                    theta = np.degrees(np.arctan2(die_y, die_x))
                    if theta < 0:
                        theta += 360
                    if notch_min <= theta <= notch_max:
                        continue

                    total_count += 1

    return total_count


# ==============================
# 主程序
# ==============================
def main():
    if not INPUT_FOLDER.exists():
        print(f"❌ 文件夹不存在: {INPUT_FOLDER}")
        return

    pdf_files = list(INPUT_FOLDER.glob("*.pdf"))
    if not pdf_files:
        print(f"⚠️ 在 '{INPUT_FOLDER}' 中未找到 PDF 文件")
        return

    results = []
    print(f"\n🔍 找到 {len(pdf_files)} 个 PDF 文件,开始处理...\n")

    for pdf_path in sorted(pdf_files):
        try:
            print(f"📄 处理中: {pdf_path.name}")
            params = extract_parameters_from_pdf(pdf_path)

            # 推断晶圆尺寸
            if 'alignment_marks' in params:
                wafer_diameter = infer_wafer_size_from_alignment_marks(params['alignment_marks'])
            else:
                wafer_diameter = 200  # 默认 200mm
            wafer_radius = wafer_diameter / 2.0

            # 计算
            estimated = calculate_die_count(params, wafer_radius)
            expected = params.get('photo_die_number', 'N/A')
            matched = "✅" if estimated == expected else "❌"

            # 保存结果
            results.append({
                "File": pdf_path.name,
                "Wafer_Size_mm": wafer_diameter,
                "Chip_Size_X": params['chip_size'][0],
                "Chip_Size_Y": params['chip_size'][1],
                "Offset_X": params['offset_x'],
                "Offset_Y": params['offset_y'],
                "Reticle_Die_X": params['reticle_die_x'],      # ← 明确 X 方向
                "Reticle_Die_Y": params['reticle_die_y'],      # ← 明确 Y 方向
                "Notch_Min_Angle": params['notch_min_angle'],
                "Notch_Max_Angle": params['notch_max_angle'],
                "Ring_Edge": params['ring_edge'],
                "Estimated_Die_Count": estimated,
                "PDF_Photo_Die_Number": expected,
                "Match": matched
            })
            print(f"   → 晶圆: {wafer_diameter}mm | 估算: {estimated} | PDF值: {expected} {matched}\n")

        except Exception as e:
            error_msg = str(e)
            print(f"   ❌ 错误: {error_msg}\n")
            results.append({
                "File": pdf_path.name,
                "Wafer_Size_mm": "ERROR",
                "Error": error_msg
            })

    # 写入 CSV
    fieldnames = [
        "File", "Wafer_Size_mm", "Chip_Size_X", "Chip_Size_Y",
        "Offset_X", "Offset_Y", "Reticle_Die_X", "Reticle_Die_Y",
        "Notch_Min_Angle", "Notch_Max_Angle", "Ring_Edge",
        "Estimated_Die_Count", "PDF_Photo_Die_Number", "Match"
    ]
    if any("Error" in r for r in results):
        fieldnames.append("Error")

    with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for row in results:
            # 补全缺失字段(如出错时)
            for key in fieldnames:
                row.setdefault(key, "")
            writer.writerow(row)

    print(f"✅ 完成!结果已保存至:\n{OUTPUT_CSV}")


if __name__ == "__main__":
    main()

代码运行后打开保存的csv文件显示如下:

File Reticle_Die_X Reticle_Die_Y Estimated PDF Value Match
a1tmqi66 .pdf 25 40 61154 61154
a1tmqz88 .pdf 26 27 42512 42512
相关推荐
花酒锄作田9 小时前
使用 pkgutil 实现动态插件系统
python
前端付豪13 小时前
LangChain链 写一篇完美推文?用SequencialChain链接不同的组件
人工智能·python·langchain
曲幽13 小时前
FastAPI实战:打造本地文生图接口,ollama+diffusers让AI绘画更听话
python·fastapi·web·cors·diffusers·lcm·ollama·dreamshaper8·txt2img
老赵全栈实战13 小时前
Pydantic配置管理最佳实践(一)
python
阿尔的代码屋19 小时前
[大模型实战 07] 基于 LlamaIndex ReAct 框架手搓全自动博客监控 Agent
人工智能·python
AI探索者2 天前
LangGraph StateGraph 实战:状态机聊天机器人构建指南
python
AI探索者2 天前
LangGraph 入门:构建带记忆功能的天气查询 Agent
python
FishCoderh2 天前
Python自动化办公实战:批量重命名文件,告别手动操作
python
躺平大鹅2 天前
Python函数入门详解(定义+调用+参数)
python
曲幽2 天前
我用FastAPI接ollama大模型,差点被asyncio整崩溃(附对话窗口实战)
python·fastapi·web·async·httpx·asyncio·ollama