使用python实现从PDF格式的control mapping获取gross die数量

需求:通过fab给出的如下PDF格式的control mapping获取一片晶圆芯片数量

实现逻辑思路

✅ 正确解析 X/cell 和 Y/cell

✅ 字段命名清晰:Reticle_Die_X(= X/cell)、Reticle_Die_Y(= Y/cell)

✅ 自动提取 Chip Size、Offset、Notch Angle、Ring Edge、Photo Die Number

✅ 动态计算 Die 数量,支持任意 Reticle 布局

✅ 输出 CSV 到所选文件夹,带 ✅/❌ 匹配结果

具体python代码实现:

python 复制代码
import csv
import re
import numpy as np
import pdfplumber
from pathlib import Path
import tkinter as tk
from tkinter import filedialog

# 隐藏主窗口
root = tk.Tk()
root.withdraw()

# ==============================
# 用户选择输入文件夹
# ==============================
print("📂 请选择包含 CONTROL MAPPING PDF 的文件夹...")
input_folder = filedialog.askdirectory(title="选择 PDF 所在文件夹")
if not input_folder:
    print("❌ 未选择文件夹,程序退出。")
    exit()

INPUT_FOLDER = Path(input_folder)
OUTPUT_CSV = INPUT_FOLDER / "die_count_results.csv"

# ==============================
# 默认回退值(PDF 中缺失时使用)
# ==============================
DEFAULTS = {
    'reticle_die_x': 26,   # Reticle Columns → X direction
    'reticle_die_y': 27,   # Reticle Rows    → Y direction
    'notch_min_angle': 263.83,
    'notch_max_angle': 275.81,
    'ring_edge': 3.0
}


# ==============================
# 推断晶圆尺寸(基于 Alignment Mark 坐标)
# ==============================
def infer_wafer_size_from_alignment_marks(alignment_marks):
    max_coord = max(max(abs(x), abs(y)) for x, y in alignment_marks)
    if max_coord < 70:
        return 150
    elif max_coord < 120:
        return 200
    else:
        return 300


# ==============================
# 从 PDF 提取所有关键参数
# ==============================
def extract_parameters_from_pdf(pdf_path):
    parameters = {}
    try:
        with pdfplumber.open(pdf_path) as pdf:
            full_text = ""
            for page in pdf.pages:
                text = page.extract_text()
                if text:
                    full_text += text + "\n"

        lines = full_text.splitlines()

        for line in lines:
            line = line.strip()
            if not line:
                continue

            # --- Chip Size ---
            if "Chip Size" in line:
                x_match = re.search(r'[Xx][=:]\s*([+-]?\d*\.?\d+)', line)
                y_match = re.search(r'[Yy][=:]\s*([+-]?\d*\.?\d+)', line)
                if x_match and y_match:
                    parameters['chip_size'] = (float(x_match.group(1)), float(y_match.group(1)))

            # --- Offset Value ---
            elif "Offset Value" in line:
                x_match = re.search(r'[Xx][=:]\s*([+-]?\d*\.?\d+)', line)
                y_match = re.search(r'[Yy][=:]\s*([+-]?\d*\.?\d+)', line)
                if x_match and y_match:
                    parameters['offset_x'] = float(x_match.group(1))
                    parameters['offset_y'] = float(y_match.group(1))

            # --- Photo Die Number ---
            elif "Photo Die Number" in line and ":" in line:
                try:
                    val = int(re.search(r'\d+', line.split(":")[1]).group())
                    parameters['photo_die_number'] = val
                except:
                    pass

            # --- Ring Edge ---
            elif "Ring Edge" in line and ":" in line:
                try:
                    val = float(re.search(r'[\d.]+', line.split(":")[1]).group())
                    parameters['ring_edge'] = val
                except:
                    pass

            # --- Notch Angle (支持多种格式) ---
            elif "Notch" in line and ("Angle" in line or "°" in line):
                angles = re.findall(r'\d*\.?\d+', line)
                if len(angles) >= 2:
                    try:
                        a1, a2 = sorted([float(angles[0]), float(angles[1])])
                        parameters['notch_min_angle'] = a1
                        parameters['notch_max_angle'] = a2
                    except:
                        pass

            # --- Alignment Mark ---
            elif "Alignment Mark" in line:
                coords = re.findall(r'[+-]?\d*\.?\d+', line)
                if len(coords) >= 4:
                    try:
                        x1, y1, x2, y2 = map(float, coords[:4])
                        parameters['alignment_marks'] = [(x1, y1), (x2, y2)]
                    except Exception:
                        pass

            # === 提取 Reticle Die Count in X (来自 "Reticle Columns" 或 "X/cell") ===
        x_cell_match = re.search(
            r'(?:[Rr]eticle\s+[Cc]olumns?|X[/\\]cell)\s*[:=]\s*(\d+)',
            full_text,
            re.IGNORECASE | re.DOTALL
        )
        if x_cell_match:
            parameters['reticle_die_x'] = int(x_cell_match.group(1))
        else:
            # Fallback: 单独找 "X/cell = 25"
            fallback_x = re.search(r'X[/\\]cell\s*[:=]\s*(\d+)', full_text, re.IGNORECASE)
            if fallback_x:
                parameters['reticle_die_x'] = int(fallback_x.group(1))

        # === 提取 Reticle Die Count in Y (来自 "Reticle Rows" 或 "Y/cell") ===
        y_cell_match = re.search(
            r'(?:[Rr]eticle\s+[Rr]ows?|Y[/\\]cell)\s*[:=]\s*(\d+)',
            full_text,
            re.IGNORECASE | re.DOTALL
        )
        if y_cell_match:
            parameters['reticle_die_y'] = int(y_cell_match.group(1))
        else:
            # Fallback: 单独找 "Y/cell = 40"
            fallback_y = re.search(r'Y[/\\]cell\s*[:=]\s*(\d+)', full_text, re.IGNORECASE)
            if fallback_y:
                parameters['reticle_die_y'] = int(fallback_y.group(1))

        # 必需字段检查
        if 'chip_size' not in parameters:
            raise ValueError("Chip Size not found")

        # 设置默认值
        parameters.setdefault('offset_x', 0.0)
        parameters.setdefault('offset_y', 0.0)
        parameters.setdefault('reticle_die_x', DEFAULTS['reticle_die_x'])
        parameters.setdefault('reticle_die_y', DEFAULTS['reticle_die_y'])
        parameters.setdefault('notch_min_angle', DEFAULTS['notch_min_angle'])
        parameters.setdefault('notch_max_angle', DEFAULTS['notch_max_angle'])
        parameters.setdefault('ring_edge', DEFAULTS['ring_edge'])

        return parameters

    except Exception as e:
        raise ValueError(f"Parse failed: {str(e)}")


# ==============================
# 动态计算有效 Die 数量
# ==============================
def calculate_die_count(parameters, wafer_radius):
    chip_w, chip_h = parameters['chip_size']
    offset_x = parameters['offset_x']
    offset_y = parameters['offset_y']
    ring_edge = parameters['ring_edge']
    reticle_die_x = parameters['reticle_die_x']   # X方向 die 数(原 Columns)
    reticle_die_y = parameters['reticle_die_y']   # Y方向 die 数(原 Rows)
    notch_min = parameters['notch_min_angle']
    notch_max = parameters['notch_max_angle']

    effective_radius = wafer_radius - ring_edge
    reticle_w = reticle_die_x * chip_w
    reticle_h = reticle_die_y * chip_h

    max_reticles_x = int((wafer_radius + abs(offset_x)) / reticle_w) + 5
    max_reticles_y = int((wafer_radius + abs(offset_y)) / reticle_h) + 5

    total_count = 0

    for i in range(-max_reticles_x, max_reticles_x + 1):
        for j in range(-max_reticles_y, max_reticles_y + 1):
            reticle_x0 = offset_x + i * reticle_w
            reticle_y0 = offset_y + j * reticle_h

            # 快速跳过完全在圆外的 Reticle
            if (reticle_x0 + reticle_w < -effective_radius or
                reticle_x0 > effective_radius or
                reticle_y0 + reticle_h < -effective_radius or
                reticle_y0 > effective_radius):
                continue

            for di in range(reticle_die_x):
                for dj in range(reticle_die_y):
                    die_x = reticle_x0 + (di + 0.5) * chip_w
                    die_y = reticle_y0 + (dj + 0.5) * chip_h

                    r = np.sqrt(die_x**2 + die_y**2)
                    if r > effective_radius:
                        continue

                    theta = np.degrees(np.arctan2(die_y, die_x))
                    if theta < 0:
                        theta += 360
                    if notch_min <= theta <= notch_max:
                        continue

                    total_count += 1

    return total_count


# ==============================
# 主程序
# ==============================
def main():
    if not INPUT_FOLDER.exists():
        print(f"❌ 文件夹不存在: {INPUT_FOLDER}")
        return

    pdf_files = list(INPUT_FOLDER.glob("*.pdf"))
    if not pdf_files:
        print(f"⚠️ 在 '{INPUT_FOLDER}' 中未找到 PDF 文件")
        return

    results = []
    print(f"\n🔍 找到 {len(pdf_files)} 个 PDF 文件,开始处理...\n")

    for pdf_path in sorted(pdf_files):
        try:
            print(f"📄 处理中: {pdf_path.name}")
            params = extract_parameters_from_pdf(pdf_path)

            # 推断晶圆尺寸
            if 'alignment_marks' in params:
                wafer_diameter = infer_wafer_size_from_alignment_marks(params['alignment_marks'])
            else:
                wafer_diameter = 200  # 默认 200mm
            wafer_radius = wafer_diameter / 2.0

            # 计算
            estimated = calculate_die_count(params, wafer_radius)
            expected = params.get('photo_die_number', 'N/A')
            matched = "✅" if estimated == expected else "❌"

            # 保存结果
            results.append({
                "File": pdf_path.name,
                "Wafer_Size_mm": wafer_diameter,
                "Chip_Size_X": params['chip_size'][0],
                "Chip_Size_Y": params['chip_size'][1],
                "Offset_X": params['offset_x'],
                "Offset_Y": params['offset_y'],
                "Reticle_Die_X": params['reticle_die_x'],      # ← 明确 X 方向
                "Reticle_Die_Y": params['reticle_die_y'],      # ← 明确 Y 方向
                "Notch_Min_Angle": params['notch_min_angle'],
                "Notch_Max_Angle": params['notch_max_angle'],
                "Ring_Edge": params['ring_edge'],
                "Estimated_Die_Count": estimated,
                "PDF_Photo_Die_Number": expected,
                "Match": matched
            })
            print(f"   → 晶圆: {wafer_diameter}mm | 估算: {estimated} | PDF值: {expected} {matched}\n")

        except Exception as e:
            error_msg = str(e)
            print(f"   ❌ 错误: {error_msg}\n")
            results.append({
                "File": pdf_path.name,
                "Wafer_Size_mm": "ERROR",
                "Error": error_msg
            })

    # 写入 CSV
    fieldnames = [
        "File", "Wafer_Size_mm", "Chip_Size_X", "Chip_Size_Y",
        "Offset_X", "Offset_Y", "Reticle_Die_X", "Reticle_Die_Y",
        "Notch_Min_Angle", "Notch_Max_Angle", "Ring_Edge",
        "Estimated_Die_Count", "PDF_Photo_Die_Number", "Match"
    ]
    if any("Error" in r for r in results):
        fieldnames.append("Error")

    with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for row in results:
            # 补全缺失字段(如出错时)
            for key in fieldnames:
                row.setdefault(key, "")
            writer.writerow(row)

    print(f"✅ 完成!结果已保存至:\n{OUTPUT_CSV}")


if __name__ == "__main__":
    main()

代码运行后打开保存的csv文件显示如下:

File Reticle_Die_X Reticle_Die_Y Estimated PDF Value Match
a1tmqi66 .pdf 25 40 61154 61154
a1tmqz88 .pdf 26 27 42512 42512
相关推荐
徐同保18 小时前
上传文件,在前端用 pdf.js 提取 上传的pdf文件中的图片
前端·javascript·pdf
CodeCraft Studio18 小时前
国产化PDF处理控件Spire.PDF教程:使用Python批量自动化将PDF转换为黑白(灰度)
python·pdf·自动化·spire.pdf·文档自动化·pdf开发组件·国产化文档组件
web3.088899918 小时前
1688商品详情API接口深度解析
开发语言·python
幻云201018 小时前
Python机器学习:从入门到资深
人工智能·python
爱吃羊的老虎18 小时前
从零开始安装到精通的 Jupyter Notebook 完整教程
ide·python·jupyter
时兮兮时18 小时前
CALIPSO垂直特征掩膜(VFM)—使用python绘制主类型、气溶胶和云的子类型
笔记·python·calipso
言之。18 小时前
LangChain 模型模块使用详解
python·langchain·flask
屋檐上的大修勾18 小时前
AI算力开放-yolov8适配 mmyolo大疆无人机
开发语言·python
郑州光合科技余经理18 小时前
开发实战:海外版同城o2o生活服务平台核心模块设计
开发语言·git·python·架构·uni-app·生活·智慧城市