1. 核心建模框架
┌─────────────────────────────────────────┐
│ 事件调度器 (Event Queue) │
│ 全局时间 + 优先级队列管理 │
└─────────────────────────────────────────┘
│
┌───────────────┼───────────────┐
▼ ▼ ▼
┌────────┐ ┌────────┐ ┌──────────┐
│ CPU │◄──►│ Cache │◄──►│ Memory │
│ 模型 │ │ 模型 │ │ 控制器 │
└────────┘ └────────┘ └──────────┘
│ │ │
▼ ▼ ▼
产生事件 产生事件 产生事件
(执行完成) (命中/缺失) (数据返回)
每个硬件模型的核心结构:
python
class HardwareComponent:
def __init__(self, name, latency_params):
self.name = name
self.state = "IDLE" # 状态机
self.latency = latency_params # 延迟参数
def process_event(self, event, current_time):
"""处理输入事件,返回新产生的事件列表"""
new_events = []
# 1. 根据当前状态和事件类型处理
# 2. 计算完成时间(当前时间 + 处理延迟)
# 3. 可能改变自身状态
# 4. 生成下游事件
return new_events
2. 具体硬件建模详解
🔷 CPU / 处理器核心
建模要点:指令流水线、执行单元、分支预测
python
class CPUCore:
def __init__(self):
self.pipeline = [] # 流水线各级
self.rob = [] # Reorder Buffer
self.pc = 0 # 程序计数器
self.stats = {"cycles": 0, "instructions": 0}
def process_event(self, event, current_time):
new_events = []
if event.type == "FETCH":
# 取指:发送请求到I-Cache
new_events.append(Event(
time=current_time, # 立即发送
target="I-Cache",
action="READ",
data={"addr": self.pc, "core_id": self.id}
))
self.pc += 4 # 假设4字节指令
elif event.type == "EXECUTE_COMPLETE":
# 执行完成,检查是否需要访存
instr = event.data["instruction"]
if instr.is_load():
# 发送读请求到D-Cache
new_events.append(Event(
time=current_time,
target="D-Cache",
action="READ",
data={"addr": instr.addr, "core_id": self.id}
))
elif instr.is_store():
new_events.append(Event(
time=current_time,
target="D-Cache",
action="WRITE",
data={"addr": instr.addr, "data": instr.data}
))
else:
# 普通ALU指令,直接提交
new_events.append(Event(
time=current_time + 1, # 1周期提交
target=self.id,
action="COMMIT"
))
elif event.type == "CACHE_RESPONSE":
# 缓存响应到达,继续执行
new_events.append(Event(
time=current_time + 1,
target=self.id,
action="EXECUTE_RESUME",
data={"data": event.data["data"]}
))
return new_events
🔷 缓存 (Cache)
建模要点:Tag阵列、Data阵列、MSHR (Miss Status Handling Register)、替换策略
python
class Cache:
def __init__(self, size, ways, block_size, latency, next_level=None):
self.size = size
self.ways = ways
self.block_size = block_size
self.hit_latency = latency # 命中延迟(周期或ns)
self.next_level = next_level # 下一级缓存/内存
self.tags = {} # 地址 -> CacheLine
self.mshrs = [] # 未完成的miss请求
self.stats = {"hits": 0, "misses": 0}
def process_event(self, event, current_time):
new_events = []
addr = event.data["addr"]
block_addr = addr // self.block_size
if event.type == "READ" or event.type == "WRITE":
# 检查是否命中
if self.is_hit(block_addr):
# 命中:延迟后返回
self.stats["hits"] += 1
new_events.append(Event(
time=current_time + self.hit_latency,
target=event.data["core_id"],
action="CACHE_RESPONSE",
data={"data": self.read_data(block_addr), "hit": True}
))
else:
# 未命中:检查MSHR,可能需要发送到下一级
self.stats["misses"] += 1
mshr_id = self.allocate_mshr(addr, event)
if mshr_id is not None: # 新的miss
new_events.append(Event(
time=current_time, # 立即发送
target=self.next_level,
action="READ",
data={"addr": addr, "mshr_id": mshr_id, "source": self.name}
))
# 否则合并到已有MSHR,不发送新请求
elif event.type == "FILL": # 从下级返回的数据
# 更新缓存,唤醒等待的MSHR
self.fill_cache(event.data["addr"], event.data["data"])
waiting_requests = self.clear_mshr(event.data["mshr_id"])
for req in waiting_requests:
new_events.append(Event(
time=current_time + self.hit_latency,
target=req.source,
action="CACHE_RESPONSE",
data={"data": event.data["data"], "hit": False}
))
return new_events
def is_hit(self, block_addr):
return block_addr in self.tags
def allocate_mshr(self, addr, original_event):
# 管理非阻塞缓存的未命中请求
if len(self.mshrs) < self.max_mshrs:
self.mshrs.append({"addr": addr, "waiting": [original_event]})
return len(self.mshrs) - 1
return None # MSHR满,需要阻塞
🔷 内存控制器 / DRAM
建模要点:Bank并行、行缓冲、刷新、命令调度(FR-FCFS)
python
class DRAMController:
def __init__(self, config):
self.banks = [{"row": -1, "busy_until": 0} for _ in range(config.banks)]
self.row_buffer_hits = 0
self.row_buffer_misses = 0
# DRAM时序参数(单位:ns)
self.timing = {
"tRCD": 15, # RAS to CAS delay
"tCAS": 15, # CAS latency
"tRP": 15, # Row precharge
"tRAS": 35, # Row active time
"tRC": 50, # Row cycle time
"tBurst": 4 # Burst length
}
def process_event(self, event, current_time):
new_events = []
if event.type == "READ":
addr = event.data["addr"]
bank_id, row = self.decode_addr(addr)
bank = self.banks[bank_id]
# 计算实际完成时间(考虑Bank状态)
completion_time = current_time
if bank["row"] == row:
# 行缓冲命中:直接读
self.row_buffer_hits += 1
completion_time += self.timing["tCAS"] + self.timing["tBurst"]
else:
# 行缓冲未命中:需要预充电 + 激活 + 读
self.row_buffer_misses += 1
if bank["busy_until"] > current_time:
completion_time = bank["busy_until"] # 等待Bank空闲
completion_time += self.timing["tRP"] + self.timing["tRCD"] + \
self.timing["tCAS"] + self.timing["tBurst"]
bank["row"] = row # 更新行缓冲
bank["busy_until"] = completion_time
# 发送数据返回事件
new_events.append(Event(
time=completion_time,
target=event.data["source"], # 返回给请求的缓存
action="FILL",
data={"addr": addr, "data": "memory_data", "mshr_id": event.data.get("mshr_id")}
))
return new_events
def decode_addr(self, addr):
# 地址映射:Bank选择、行选择
bank_id = (addr >> 10) & 0x7 # 假设3位Bank选择
row = (addr >> 13) & 0x3FFF # 行地址
return bank_id, row
🔷 互联网络 (NoC / 总线)
建模要点:路由算法、链路带宽、缓冲区、仲裁
python
class NetworkOnChip:
def __init__(self, topology="mesh", dim_x=4, dim_y=4):
self.routers = {}
self.links = {}
self.setup_topology(topology, dim_x, dim_y)
def process_event(self, event, current_time):
new_events = []
if event.type == "PACKET_INJECT":
# 数据包进入网络
packet = event.data["packet"]
src = event.data["src"]
dst = event.data["dst"]
# 确定路由路径(XY路由)
path = self.route_xy(src, dst)
next_hop = path[1] # 下一跳路由器
# 计算传输延迟(链路延迟 + 路由器延迟)
link_delay = self.links[(src, next_hop)]["delay"]
router_delay = 2 # 2周期路由决策
new_events.append(Event(
time=current_time + link_delay + router_delay,
target=f"Router-{next_hop}",
action="PACKET_ARRIVE",
data={"packet": packet, "path": path[1:], "dst": dst}
))
elif event.type == "PACKET_ARRIVE":
packet = event.data["packet"]
current_router = event.target
if current_router == event.data["dst"]:
# 到达目的地,向上层传递
new_events.append(Event(
time=current_time,
target=packet.destination_component,
action="NETWORK_RESPONSE",
data={"packet": packet}
))
else:
# 继续转发
path = event.data["path"]
next_hop = path[0]
# ... 类似INJECT的处理
return new_events
🔷 I/O 设备 / 加速器
建模要点:DMA传输、中断、专用计算单元
python
class DMAController:
def __init__(self):
self.channels = [None] * 4 # 4个DMA通道
self.pending_transfers = []
def process_event(self, event, current_time):
new_events = []
if event.type == "DMA_REQUEST":
# 配置DMA传输
channel = self.allocate_channel()
transfer = {
"src": event.data["src_addr"],
"dst": event.data["dst_addr"],
"bytes": event.data["size"],
"start_time": current_time,
"completion_time": current_time + self.calculate_time(event.data["size"])
}
self.channels[channel] = transfer
# 生成完成事件
new_events.append(Event(
time=transfer["completion_time"],
target="CPU",
action="DMA_COMPLETE",
data={"channel": channel, "callback": event.data["callback"]}
))
# 期间会生成多个内存访问事件(与内存系统交互)
for i in range(0, event.data["size"], 64): # 64字节块传输
new_events.append(Event(
time=current_time + i/self.bandwidth,
target="Memory",
action="READ",
data={"addr": event.data["src_addr"] + i, "size": 64}
))
return new_events
3. 组件间交互示例
一个完整的Load指令流程:
时间轴 →
CPU: FETCH ─────────────────────────────────────────►
│ (发送给I-Cache)
I-Cache: READ ──────────► HIT ─────────────────────►
(1ns) │ (返回指令)
CPU: EXECUTE ──────────────────►
│ (解析为Load,发送给D-Cache)
D-Cache: READ ─────► MISS ───────────►
(2ns) │ (查询MSHR,发送给Mem)
Memory: READ ───────────────►
(50ns) │ (Bank busy, 行缓冲未命中)
Memory: DATA_READY ─►
D-Cache: FILL ─►
│ (更新缓存,唤醒CPU)
CPU: RESUME ─►
(继续执行)
4. 关键设计模式
| 模式 | 说明 | 应用 |
|---|---|---|
| 分层建模 | 高层组件(CPU)不关心底层细节(DRAM时序) | 模块化、可替换 |
| 延迟参数化 | 所有延迟可配置,支持不同技术节点 | DDR4 vs DDR5 vs HBM |
| 状态机 | 每个组件维护状态(IDLE/BUSY/WAITING) | 精确建模资源冲突 |
| 事件链 | 一个事件触发一系列下游事件 | 追踪完整操作路径 |
| 统计聚合 | 在事件处理中收集性能数据 | 生成模拟报告 |
这种建模方式的核心优势是解耦:每个硬件只需定义"收到什么事件、如何处理、产生什么新事件",调度器负责全局时间推进。这使得添加新硬件类型(如新型加速器)只需实现其事件处理逻辑,无需修改其他组件。