需求:通过fab给出的如下PDF格式的control mapping获取一片晶圆芯片数量

实现逻辑思路
✅ 正确解析 X/cell 和 Y/cell
✅ 字段命名清晰:Reticle_Die_X(= X/cell)、Reticle_Die_Y(= Y/cell)
✅ 自动提取 Chip Size、Offset、Notch Angle、Ring Edge、Photo Die Number
✅ 动态计算 Die 数量,支持任意 Reticle 布局
✅ 输出 CSV 到所选文件夹,带 ✅/❌ 匹配结果
具体python代码实现:
python
import csv
import re
import numpy as np
import pdfplumber
from pathlib import Path
import tkinter as tk
from tkinter import filedialog
# 隐藏主窗口
root = tk.Tk()
root.withdraw()
# ==============================
# 用户选择输入文件夹
# ==============================
print("📂 请选择包含 CONTROL MAPPING PDF 的文件夹...")
input_folder = filedialog.askdirectory(title="选择 PDF 所在文件夹")
if not input_folder:
print("❌ 未选择文件夹,程序退出。")
exit()
INPUT_FOLDER = Path(input_folder)
OUTPUT_CSV = INPUT_FOLDER / "die_count_results.csv"
# ==============================
# 默认回退值(PDF 中缺失时使用)
# ==============================
DEFAULTS = {
'reticle_die_x': 26, # Reticle Columns → X direction
'reticle_die_y': 27, # Reticle Rows → Y direction
'notch_min_angle': 263.83,
'notch_max_angle': 275.81,
'ring_edge': 3.0
}
# ==============================
# 推断晶圆尺寸(基于 Alignment Mark 坐标)
# ==============================
def infer_wafer_size_from_alignment_marks(alignment_marks):
max_coord = max(max(abs(x), abs(y)) for x, y in alignment_marks)
if max_coord < 70:
return 150
elif max_coord < 120:
return 200
else:
return 300
# ==============================
# 从 PDF 提取所有关键参数
# ==============================
def extract_parameters_from_pdf(pdf_path):
parameters = {}
try:
with pdfplumber.open(pdf_path) as pdf:
full_text = ""
for page in pdf.pages:
text = page.extract_text()
if text:
full_text += text + "\n"
lines = full_text.splitlines()
for line in lines:
line = line.strip()
if not line:
continue
# --- Chip Size ---
if "Chip Size" in line:
x_match = re.search(r'[Xx][=:]\s*([+-]?\d*\.?\d+)', line)
y_match = re.search(r'[Yy][=:]\s*([+-]?\d*\.?\d+)', line)
if x_match and y_match:
parameters['chip_size'] = (float(x_match.group(1)), float(y_match.group(1)))
# --- Offset Value ---
elif "Offset Value" in line:
x_match = re.search(r'[Xx][=:]\s*([+-]?\d*\.?\d+)', line)
y_match = re.search(r'[Yy][=:]\s*([+-]?\d*\.?\d+)', line)
if x_match and y_match:
parameters['offset_x'] = float(x_match.group(1))
parameters['offset_y'] = float(y_match.group(1))
# --- Photo Die Number ---
elif "Photo Die Number" in line and ":" in line:
try:
val = int(re.search(r'\d+', line.split(":")[1]).group())
parameters['photo_die_number'] = val
except:
pass
# --- Ring Edge ---
elif "Ring Edge" in line and ":" in line:
try:
val = float(re.search(r'[\d.]+', line.split(":")[1]).group())
parameters['ring_edge'] = val
except:
pass
# --- Notch Angle (支持多种格式) ---
elif "Notch" in line and ("Angle" in line or "°" in line):
angles = re.findall(r'\d*\.?\d+', line)
if len(angles) >= 2:
try:
a1, a2 = sorted([float(angles[0]), float(angles[1])])
parameters['notch_min_angle'] = a1
parameters['notch_max_angle'] = a2
except:
pass
# --- Alignment Mark ---
elif "Alignment Mark" in line:
coords = re.findall(r'[+-]?\d*\.?\d+', line)
if len(coords) >= 4:
try:
x1, y1, x2, y2 = map(float, coords[:4])
parameters['alignment_marks'] = [(x1, y1), (x2, y2)]
except Exception:
pass
# === 提取 Reticle Die Count in X (来自 "Reticle Columns" 或 "X/cell") ===
x_cell_match = re.search(
r'(?:[Rr]eticle\s+[Cc]olumns?|X[/\\]cell)\s*[:=]\s*(\d+)',
full_text,
re.IGNORECASE | re.DOTALL
)
if x_cell_match:
parameters['reticle_die_x'] = int(x_cell_match.group(1))
else:
# Fallback: 单独找 "X/cell = 25"
fallback_x = re.search(r'X[/\\]cell\s*[:=]\s*(\d+)', full_text, re.IGNORECASE)
if fallback_x:
parameters['reticle_die_x'] = int(fallback_x.group(1))
# === 提取 Reticle Die Count in Y (来自 "Reticle Rows" 或 "Y/cell") ===
y_cell_match = re.search(
r'(?:[Rr]eticle\s+[Rr]ows?|Y[/\\]cell)\s*[:=]\s*(\d+)',
full_text,
re.IGNORECASE | re.DOTALL
)
if y_cell_match:
parameters['reticle_die_y'] = int(y_cell_match.group(1))
else:
# Fallback: 单独找 "Y/cell = 40"
fallback_y = re.search(r'Y[/\\]cell\s*[:=]\s*(\d+)', full_text, re.IGNORECASE)
if fallback_y:
parameters['reticle_die_y'] = int(fallback_y.group(1))
# 必需字段检查
if 'chip_size' not in parameters:
raise ValueError("Chip Size not found")
# 设置默认值
parameters.setdefault('offset_x', 0.0)
parameters.setdefault('offset_y', 0.0)
parameters.setdefault('reticle_die_x', DEFAULTS['reticle_die_x'])
parameters.setdefault('reticle_die_y', DEFAULTS['reticle_die_y'])
parameters.setdefault('notch_min_angle', DEFAULTS['notch_min_angle'])
parameters.setdefault('notch_max_angle', DEFAULTS['notch_max_angle'])
parameters.setdefault('ring_edge', DEFAULTS['ring_edge'])
return parameters
except Exception as e:
raise ValueError(f"Parse failed: {str(e)}")
# ==============================
# 动态计算有效 Die 数量
# ==============================
def calculate_die_count(parameters, wafer_radius):
chip_w, chip_h = parameters['chip_size']
offset_x = parameters['offset_x']
offset_y = parameters['offset_y']
ring_edge = parameters['ring_edge']
reticle_die_x = parameters['reticle_die_x'] # X方向 die 数(原 Columns)
reticle_die_y = parameters['reticle_die_y'] # Y方向 die 数(原 Rows)
notch_min = parameters['notch_min_angle']
notch_max = parameters['notch_max_angle']
effective_radius = wafer_radius - ring_edge
reticle_w = reticle_die_x * chip_w
reticle_h = reticle_die_y * chip_h
max_reticles_x = int((wafer_radius + abs(offset_x)) / reticle_w) + 5
max_reticles_y = int((wafer_radius + abs(offset_y)) / reticle_h) + 5
total_count = 0
for i in range(-max_reticles_x, max_reticles_x + 1):
for j in range(-max_reticles_y, max_reticles_y + 1):
reticle_x0 = offset_x + i * reticle_w
reticle_y0 = offset_y + j * reticle_h
# 快速跳过完全在圆外的 Reticle
if (reticle_x0 + reticle_w < -effective_radius or
reticle_x0 > effective_radius or
reticle_y0 + reticle_h < -effective_radius or
reticle_y0 > effective_radius):
continue
for di in range(reticle_die_x):
for dj in range(reticle_die_y):
die_x = reticle_x0 + (di + 0.5) * chip_w
die_y = reticle_y0 + (dj + 0.5) * chip_h
r = np.sqrt(die_x**2 + die_y**2)
if r > effective_radius:
continue
theta = np.degrees(np.arctan2(die_y, die_x))
if theta < 0:
theta += 360
if notch_min <= theta <= notch_max:
continue
total_count += 1
return total_count
# ==============================
# 主程序
# ==============================
def main():
if not INPUT_FOLDER.exists():
print(f"❌ 文件夹不存在: {INPUT_FOLDER}")
return
pdf_files = list(INPUT_FOLDER.glob("*.pdf"))
if not pdf_files:
print(f"⚠️ 在 '{INPUT_FOLDER}' 中未找到 PDF 文件")
return
results = []
print(f"\n🔍 找到 {len(pdf_files)} 个 PDF 文件,开始处理...\n")
for pdf_path in sorted(pdf_files):
try:
print(f"📄 处理中: {pdf_path.name}")
params = extract_parameters_from_pdf(pdf_path)
# 推断晶圆尺寸
if 'alignment_marks' in params:
wafer_diameter = infer_wafer_size_from_alignment_marks(params['alignment_marks'])
else:
wafer_diameter = 200 # 默认 200mm
wafer_radius = wafer_diameter / 2.0
# 计算
estimated = calculate_die_count(params, wafer_radius)
expected = params.get('photo_die_number', 'N/A')
matched = "✅" if estimated == expected else "❌"
# 保存结果
results.append({
"File": pdf_path.name,
"Wafer_Size_mm": wafer_diameter,
"Chip_Size_X": params['chip_size'][0],
"Chip_Size_Y": params['chip_size'][1],
"Offset_X": params['offset_x'],
"Offset_Y": params['offset_y'],
"Reticle_Die_X": params['reticle_die_x'], # ← 明确 X 方向
"Reticle_Die_Y": params['reticle_die_y'], # ← 明确 Y 方向
"Notch_Min_Angle": params['notch_min_angle'],
"Notch_Max_Angle": params['notch_max_angle'],
"Ring_Edge": params['ring_edge'],
"Estimated_Die_Count": estimated,
"PDF_Photo_Die_Number": expected,
"Match": matched
})
print(f" → 晶圆: {wafer_diameter}mm | 估算: {estimated} | PDF值: {expected} {matched}\n")
except Exception as e:
error_msg = str(e)
print(f" ❌ 错误: {error_msg}\n")
results.append({
"File": pdf_path.name,
"Wafer_Size_mm": "ERROR",
"Error": error_msg
})
# 写入 CSV
fieldnames = [
"File", "Wafer_Size_mm", "Chip_Size_X", "Chip_Size_Y",
"Offset_X", "Offset_Y", "Reticle_Die_X", "Reticle_Die_Y",
"Notch_Min_Angle", "Notch_Max_Angle", "Ring_Edge",
"Estimated_Die_Count", "PDF_Photo_Die_Number", "Match"
]
if any("Error" in r for r in results):
fieldnames.append("Error")
with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for row in results:
# 补全缺失字段(如出错时)
for key in fieldnames:
row.setdefault(key, "")
writer.writerow(row)
print(f"✅ 完成!结果已保存至:\n{OUTPUT_CSV}")
if __name__ == "__main__":
main()
代码运行后打开保存的csv文件显示如下:
| File | Reticle_Die_X | Reticle_Die_Y | Estimated | PDF Value | Match |
|---|---|---|---|---|---|
a1tmqi66 .pdf |
25 | 40 | 61154 | 61154 | ✅ |
a1tmqz88 .pdf |
26 | 27 | 42512 | 42512 | ✅ |