使用python实现从PDF格式的control mapping获取gross die数量

需求:通过fab给出的如下PDF格式的control mapping获取一片晶圆芯片数量

实现逻辑思路

✅ 正确解析 X/cell 和 Y/cell

✅ 字段命名清晰:Reticle_Die_X(= X/cell)、Reticle_Die_Y(= Y/cell)

✅ 自动提取 Chip Size、Offset、Notch Angle、Ring Edge、Photo Die Number

✅ 动态计算 Die 数量,支持任意 Reticle 布局

✅ 输出 CSV 到所选文件夹,带 ✅/❌ 匹配结果

具体python代码实现:

python 复制代码
import csv
import re
import numpy as np
import pdfplumber
from pathlib import Path
import tkinter as tk
from tkinter import filedialog

# 隐藏主窗口
root = tk.Tk()
root.withdraw()

# ==============================
# 用户选择输入文件夹
# ==============================
print("📂 请选择包含 CONTROL MAPPING PDF 的文件夹...")
input_folder = filedialog.askdirectory(title="选择 PDF 所在文件夹")
if not input_folder:
    print("❌ 未选择文件夹,程序退出。")
    exit()

INPUT_FOLDER = Path(input_folder)
OUTPUT_CSV = INPUT_FOLDER / "die_count_results.csv"

# ==============================
# 默认回退值(PDF 中缺失时使用)
# ==============================
DEFAULTS = {
    'reticle_die_x': 26,   # Reticle Columns → X direction
    'reticle_die_y': 27,   # Reticle Rows    → Y direction
    'notch_min_angle': 263.83,
    'notch_max_angle': 275.81,
    'ring_edge': 3.0
}


# ==============================
# 推断晶圆尺寸(基于 Alignment Mark 坐标)
# ==============================
def infer_wafer_size_from_alignment_marks(alignment_marks):
    max_coord = max(max(abs(x), abs(y)) for x, y in alignment_marks)
    if max_coord < 70:
        return 150
    elif max_coord < 120:
        return 200
    else:
        return 300


# ==============================
# 从 PDF 提取所有关键参数
# ==============================
def extract_parameters_from_pdf(pdf_path):
    parameters = {}
    try:
        with pdfplumber.open(pdf_path) as pdf:
            full_text = ""
            for page in pdf.pages:
                text = page.extract_text()
                if text:
                    full_text += text + "\n"

        lines = full_text.splitlines()

        for line in lines:
            line = line.strip()
            if not line:
                continue

            # --- Chip Size ---
            if "Chip Size" in line:
                x_match = re.search(r'[Xx][=:]\s*([+-]?\d*\.?\d+)', line)
                y_match = re.search(r'[Yy][=:]\s*([+-]?\d*\.?\d+)', line)
                if x_match and y_match:
                    parameters['chip_size'] = (float(x_match.group(1)), float(y_match.group(1)))

            # --- Offset Value ---
            elif "Offset Value" in line:
                x_match = re.search(r'[Xx][=:]\s*([+-]?\d*\.?\d+)', line)
                y_match = re.search(r'[Yy][=:]\s*([+-]?\d*\.?\d+)', line)
                if x_match and y_match:
                    parameters['offset_x'] = float(x_match.group(1))
                    parameters['offset_y'] = float(y_match.group(1))

            # --- Photo Die Number ---
            elif "Photo Die Number" in line and ":" in line:
                try:
                    val = int(re.search(r'\d+', line.split(":")[1]).group())
                    parameters['photo_die_number'] = val
                except:
                    pass

            # --- Ring Edge ---
            elif "Ring Edge" in line and ":" in line:
                try:
                    val = float(re.search(r'[\d.]+', line.split(":")[1]).group())
                    parameters['ring_edge'] = val
                except:
                    pass

            # --- Notch Angle (支持多种格式) ---
            elif "Notch" in line and ("Angle" in line or "°" in line):
                angles = re.findall(r'\d*\.?\d+', line)
                if len(angles) >= 2:
                    try:
                        a1, a2 = sorted([float(angles[0]), float(angles[1])])
                        parameters['notch_min_angle'] = a1
                        parameters['notch_max_angle'] = a2
                    except:
                        pass

            # --- Alignment Mark ---
            elif "Alignment Mark" in line:
                coords = re.findall(r'[+-]?\d*\.?\d+', line)
                if len(coords) >= 4:
                    try:
                        x1, y1, x2, y2 = map(float, coords[:4])
                        parameters['alignment_marks'] = [(x1, y1), (x2, y2)]
                    except Exception:
                        pass

            # === 提取 Reticle Die Count in X (来自 "Reticle Columns" 或 "X/cell") ===
        x_cell_match = re.search(
            r'(?:[Rr]eticle\s+[Cc]olumns?|X[/\\]cell)\s*[:=]\s*(\d+)',
            full_text,
            re.IGNORECASE | re.DOTALL
        )
        if x_cell_match:
            parameters['reticle_die_x'] = int(x_cell_match.group(1))
        else:
            # Fallback: 单独找 "X/cell = 25"
            fallback_x = re.search(r'X[/\\]cell\s*[:=]\s*(\d+)', full_text, re.IGNORECASE)
            if fallback_x:
                parameters['reticle_die_x'] = int(fallback_x.group(1))

        # === 提取 Reticle Die Count in Y (来自 "Reticle Rows" 或 "Y/cell") ===
        y_cell_match = re.search(
            r'(?:[Rr]eticle\s+[Rr]ows?|Y[/\\]cell)\s*[:=]\s*(\d+)',
            full_text,
            re.IGNORECASE | re.DOTALL
        )
        if y_cell_match:
            parameters['reticle_die_y'] = int(y_cell_match.group(1))
        else:
            # Fallback: 单独找 "Y/cell = 40"
            fallback_y = re.search(r'Y[/\\]cell\s*[:=]\s*(\d+)', full_text, re.IGNORECASE)
            if fallback_y:
                parameters['reticle_die_y'] = int(fallback_y.group(1))

        # 必需字段检查
        if 'chip_size' not in parameters:
            raise ValueError("Chip Size not found")

        # 设置默认值
        parameters.setdefault('offset_x', 0.0)
        parameters.setdefault('offset_y', 0.0)
        parameters.setdefault('reticle_die_x', DEFAULTS['reticle_die_x'])
        parameters.setdefault('reticle_die_y', DEFAULTS['reticle_die_y'])
        parameters.setdefault('notch_min_angle', DEFAULTS['notch_min_angle'])
        parameters.setdefault('notch_max_angle', DEFAULTS['notch_max_angle'])
        parameters.setdefault('ring_edge', DEFAULTS['ring_edge'])

        return parameters

    except Exception as e:
        raise ValueError(f"Parse failed: {str(e)}")


# ==============================
# 动态计算有效 Die 数量
# ==============================
def calculate_die_count(parameters, wafer_radius):
    chip_w, chip_h = parameters['chip_size']
    offset_x = parameters['offset_x']
    offset_y = parameters['offset_y']
    ring_edge = parameters['ring_edge']
    reticle_die_x = parameters['reticle_die_x']   # X方向 die 数(原 Columns)
    reticle_die_y = parameters['reticle_die_y']   # Y方向 die 数(原 Rows)
    notch_min = parameters['notch_min_angle']
    notch_max = parameters['notch_max_angle']

    effective_radius = wafer_radius - ring_edge
    reticle_w = reticle_die_x * chip_w
    reticle_h = reticle_die_y * chip_h

    max_reticles_x = int((wafer_radius + abs(offset_x)) / reticle_w) + 5
    max_reticles_y = int((wafer_radius + abs(offset_y)) / reticle_h) + 5

    total_count = 0

    for i in range(-max_reticles_x, max_reticles_x + 1):
        for j in range(-max_reticles_y, max_reticles_y + 1):
            reticle_x0 = offset_x + i * reticle_w
            reticle_y0 = offset_y + j * reticle_h

            # 快速跳过完全在圆外的 Reticle
            if (reticle_x0 + reticle_w < -effective_radius or
                reticle_x0 > effective_radius or
                reticle_y0 + reticle_h < -effective_radius or
                reticle_y0 > effective_radius):
                continue

            for di in range(reticle_die_x):
                for dj in range(reticle_die_y):
                    die_x = reticle_x0 + (di + 0.5) * chip_w
                    die_y = reticle_y0 + (dj + 0.5) * chip_h

                    r = np.sqrt(die_x**2 + die_y**2)
                    if r > effective_radius:
                        continue

                    theta = np.degrees(np.arctan2(die_y, die_x))
                    if theta < 0:
                        theta += 360
                    if notch_min <= theta <= notch_max:
                        continue

                    total_count += 1

    return total_count


# ==============================
# 主程序
# ==============================
def main():
    if not INPUT_FOLDER.exists():
        print(f"❌ 文件夹不存在: {INPUT_FOLDER}")
        return

    pdf_files = list(INPUT_FOLDER.glob("*.pdf"))
    if not pdf_files:
        print(f"⚠️ 在 '{INPUT_FOLDER}' 中未找到 PDF 文件")
        return

    results = []
    print(f"\n🔍 找到 {len(pdf_files)} 个 PDF 文件,开始处理...\n")

    for pdf_path in sorted(pdf_files):
        try:
            print(f"📄 处理中: {pdf_path.name}")
            params = extract_parameters_from_pdf(pdf_path)

            # 推断晶圆尺寸
            if 'alignment_marks' in params:
                wafer_diameter = infer_wafer_size_from_alignment_marks(params['alignment_marks'])
            else:
                wafer_diameter = 200  # 默认 200mm
            wafer_radius = wafer_diameter / 2.0

            # 计算
            estimated = calculate_die_count(params, wafer_radius)
            expected = params.get('photo_die_number', 'N/A')
            matched = "✅" if estimated == expected else "❌"

            # 保存结果
            results.append({
                "File": pdf_path.name,
                "Wafer_Size_mm": wafer_diameter,
                "Chip_Size_X": params['chip_size'][0],
                "Chip_Size_Y": params['chip_size'][1],
                "Offset_X": params['offset_x'],
                "Offset_Y": params['offset_y'],
                "Reticle_Die_X": params['reticle_die_x'],      # ← 明确 X 方向
                "Reticle_Die_Y": params['reticle_die_y'],      # ← 明确 Y 方向
                "Notch_Min_Angle": params['notch_min_angle'],
                "Notch_Max_Angle": params['notch_max_angle'],
                "Ring_Edge": params['ring_edge'],
                "Estimated_Die_Count": estimated,
                "PDF_Photo_Die_Number": expected,
                "Match": matched
            })
            print(f"   → 晶圆: {wafer_diameter}mm | 估算: {estimated} | PDF值: {expected} {matched}\n")

        except Exception as e:
            error_msg = str(e)
            print(f"   ❌ 错误: {error_msg}\n")
            results.append({
                "File": pdf_path.name,
                "Wafer_Size_mm": "ERROR",
                "Error": error_msg
            })

    # 写入 CSV
    fieldnames = [
        "File", "Wafer_Size_mm", "Chip_Size_X", "Chip_Size_Y",
        "Offset_X", "Offset_Y", "Reticle_Die_X", "Reticle_Die_Y",
        "Notch_Min_Angle", "Notch_Max_Angle", "Ring_Edge",
        "Estimated_Die_Count", "PDF_Photo_Die_Number", "Match"
    ]
    if any("Error" in r for r in results):
        fieldnames.append("Error")

    with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for row in results:
            # 补全缺失字段(如出错时)
            for key in fieldnames:
                row.setdefault(key, "")
            writer.writerow(row)

    print(f"✅ 完成!结果已保存至:\n{OUTPUT_CSV}")


if __name__ == "__main__":
    main()

代码运行后打开保存的csv文件显示如下:

File Reticle_Die_X Reticle_Die_Y Estimated PDF Value Match
a1tmqi66 .pdf 25 40 61154 61154
a1tmqz88 .pdf 26 27 42512 42512
相关推荐
火车叼位32 分钟前
脚本伪装:让 Python 与 Node.js 像原生 Shell 命令一样运行
运维·javascript·python
孤狼warrior43 分钟前
YOLO目标检测 一千字解析yolo最初的摸样 模型下载,数据集构建及模型训练代码
人工智能·python·深度学习·算法·yolo·目标检测·目标跟踪
Katecat996631 小时前
YOLO11分割算法实现甲状腺超声病灶自动检测与定位_DWR方法应用
python
玩大数据的龙威1 小时前
农经权二轮延包—各种地块示意图
python·arcgis
ZH15455891311 小时前
Flutter for OpenHarmony Python学习助手实战:数据库操作与管理的实现
python·学习·flutter
belldeep2 小时前
python:用 Flask 3 , mistune 2 和 mermaid.min.js 10.9 来实现 Markdown 中 mermaid 图表的渲染
javascript·python·flask
喵手2 小时前
Python爬虫实战:电商价格监控系统 - 从定时任务到历史趋势分析的完整实战(附CSV导出 + SQLite持久化存储)!
爬虫·python·爬虫实战·零基础python爬虫教学·电商价格监控系统·从定时任务到历史趋势分析·采集结果sqlite存储
喵手2 小时前
Python爬虫实战:京东/淘宝搜索多页爬虫实战 - 从反爬对抗到数据入库的完整工程化方案(附CSV导出 + SQLite持久化存储)!
爬虫·python·爬虫实战·零基础python爬虫教学·京东淘宝页面数据采集·反爬对抗到数据入库·采集结果csv导出
B站_计算机毕业设计之家2 小时前
猫眼电影数据可视化与智能分析平台 | Python Flask框架 Echarts 推荐算法 爬虫 大数据 毕业设计源码
python·机器学习·信息可视化·flask·毕业设计·echarts·推荐算法
PPPPPaPeR.2 小时前
光学算法实战:深度解析镜片厚度对前后表面折射/反射的影响(纯Python实现)
开发语言·python·数码相机·算法