LuaJit分析(九)LuaJit中的JIT原理分析

Jit in luajit

Luajit是一款高性能的lua解释器,与官方的lua解释器相比,luajit的高速除了将解释器直接以汇编代码实现外,还支持jit模式(Just in time)。Jit模式即将luajit的字节码编译成处理器能够直接执行的机器码,从而比解释执行速度更快。

Luajit存在97个字节码指令,例如 FORL指令对应一个数字类型的for循环语句,同时还有IFORL指令(强制解释模式执行)和JFORL指令(Jit模式执行),同时解释器实现了对各个字节码指令的翻译,这里以X86的翻译器为例。

Luajit优化一段指令序列,当一个指令的地址被识别为hot后,并开始跟踪记录指令线性序列、在退出跟踪时将指令序列编译成机器码。但是luajit只对FUNCF、FORL、ITERL、LOOP这四个指令进行了跟踪,即循环和一个函数的开始,例如,在解释执行FORL指令:

cpp 复制代码
case BC_FORL:
    |.if JIT
    |  hotloop RB
    |.endif
    | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
break;

它首先判断是否是JIT模式,如果是jit模式,则调用hotloop块进行热点判断,同样的,如果是FUNCF指令,则调用hotcall块:

cpp 复制代码
case BC_FUNCF:
    |.if JIT
    |  hotcall RB
    |.endif
case BC_FUNCV:  /* NYI: compiled vararg functions. */
    | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
break;

hotloop块的定义如下:

cpp 复制代码
|// Decrement hashed hotcount and trigger trace recorder if zero.
|.macro hotloop, reg
|  mov reg, PC
|  shr reg, 1
|  and reg, HOTCOUNT_PCMASK
|  sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
|  jb ->vm_hotloop
|.endmacro

它将当前指令的地址右移一位,并与HOTCOUNT_PCMASK与操作,得到一个索引(哈希运算),根据这个索引在数值中找到计数值,减去HOTCOUNT_LOOP,当这个计数值小于0时,跳转到vm_hotloop继续执行。

cpp 复制代码
|->vm_hotloop:               // Hot loop counter underflow.
|.if JIT
|  mov LFUNC:RB, [BASE-8]           // Same as curr_topL(L).
|  mov RB, LFUNC:RB->pc
|  movzx RD, byte [RB+PC2PROTO(framesize)]
|  lea RD, [BASE+RD*8]
|  mov L:RB, SAVE_L
|  mov L:RB->base, BASE
|  mov L:RB->top, RD
|  mov FCARG2, PC
|  lea FCARG1, [DISPATCH+GG_DISP2J]
|  mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
|  mov SAVE_PC, PC
|  call extern lj_trace_hot@8          // (jit_State *J, const BCIns *pc)
|  jmp <3
|.endif

首先获取当前的函数,并得到字节码PC指针,获取栈大小并保存到RD中,接着讲top的位置保存到RD中,在进行一些参数设置后,调用lj_trace_hot用于跟踪热点,该函数位于lj_trace.c中:

cpp 复制代码
/* A hotcount triggered. Start recording a root trace. */
void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc)
{
  /* Note: pc is the interpreter bytecode PC here. It's offset by 1. */
  ERRNO_SAVE
  /* Reset hotcount. */
  hotcount_set(J2GG(J), pc, J->param[JIT_P_hotloop]*HOTCOUNT_LOOP);
  /* Only start a new trace if not recording or inside __gc call or vmevent. */
  if (J->state == LJ_TRACE_IDLE &&
      !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
    J->parent = 0;  /* Root trace. */
    J->exitno = 0;
    J->state = LJ_TRACE_START;
    lj_trace_ins(J, pc-1);
  }
  ERRNO_RESTORE
}

它将状态设置为LJ_TRACE_START后,开始调用lj_trace_ins进行热点跟踪:

cpp 复制代码
/* A bytecode instruction is about to be executed. Record it. */
void lj_trace_ins(jit_State *J, const BCIns *pc)
{
  /* Note: J->L must already be set. pc is the true bytecode PC here. */
  J->pc = pc;
  J->fn = curr_func(J->L);
  J->pt = isluafunc(J->fn) ? funcproto(J->fn) : NULL;
  while (lj_vm_cpcall(J->L, NULL, (void *)J, trace_state) != 0)
    J->state = LJ_TRACE_ERR;
}

这里的pc是指向的字节码指令,在循环中不断执行和跟踪,这里的跟踪通过trace_state函数实现,这个函数存在7种状态:

cpp 复制代码
/* Trace compiler state. */
typedef enum {
  LJ_TRACE_IDLE,  /* Trace compiler idle. */
  LJ_TRACE_ACTIVE = 0x10,
  LJ_TRACE_RECORD,  /* Bytecode recording active. */
  LJ_TRACE_START, /* New trace started. */
  LJ_TRACE_END,   /* End of trace. */
  LJ_TRACE_ASM,   /* Assemble trace. */
  LJ_TRACE_ERR    /* Trace aborted with error. */
} TraceState;

IDLE表示空闲、RECORD表示正在跟踪记录、END表示结束、ASM表示开始编译机器指令,这个状态转换函数的实现如下:

cpp 复制代码
/* State machine for the trace compiler. Protected callback. */
static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud)
{
  jit_State *J = (jit_State *)ud;
  UNUSED(dummy);
  do {
  retry:
    switch (J->state) {
    case LJ_TRACE_START:
      J->state = LJ_TRACE_RECORD;  /* trace_start() may change state. */
      trace_start(J);
      lj_dispatch_update(J2G(J));
      break;
    case LJ_TRACE_RECORD:
      trace_pendpatch(J, 0);
      setvmstate(J2G(J), RECORD);
      lj_vmevent_send_(L, RECORD,
  /* Save/restore tmptv state for trace recorder. */
  TValue savetv = J2G(J)->tmptv;
  TValue savetv2 = J2G(J)->tmptv2;
  setintV(L->top++, J->cur.traceno);
  setfuncV(L, L->top++, J->fn);
  setintV(L->top++, J->pt ? (int32_t)proto_bcpos(J->pt, J->pc) : -1);
  setintV(L->top++, J->framedepth);
  J2G(J)->tmptv = savetv;
  J2G(J)->tmptv2 = savetv2;
      );
      lj_record_ins(J);
      break;
    case LJ_TRACE_END:
      trace_pendpatch(J, 1);
      J->loopref = 0;
      if ((J->flags & JIT_F_OPT_LOOP) &&
    J->cur.link == J->cur.traceno && J->framedepth + J->retdepth == 0) {
  setvmstate(J2G(J), OPT);
  lj_opt_dce(J);
  if (lj_opt_loop(J)) {  /* Loop optimization failed? */
    J->cur.link = 0;
    J->cur.linktype = LJ_TRLINK_NONE;
    J->loopref = J->cur.nins;
    J->state = LJ_TRACE_RECORD;  /* Try to continue recording. */
    break;
  }
  J->loopref = J->chain[IR_LOOP];  /* Needed by assembler. */
      }
      lj_opt_split(J);
      lj_opt_sink(J);
      if (!J->loopref) J->cur.snap[J->cur.nsnap-1].count = SNAPCOUNT_DONE;
      J->state = LJ_TRACE_ASM;
      break;
  
    case LJ_TRACE_ASM:
      setvmstate(J2G(J), ASM);
      lj_asm_trace(J, &J->cur);
      trace_stop(J);
      setvmstate(J2G(J), INTERP);
      J->state = LJ_TRACE_IDLE;
      lj_dispatch_update(J2G(J));
      return NULL;
    default:  /* Trace aborted asynchronously. */
      setintV(L->top++, (int32_t)LJ_TRERR_RECERR);
      /* fallthrough */
    case LJ_TRACE_ERR:
      trace_pendpatch(J, 1);
      if (trace_abort(J))
  goto retry;
      setvmstate(J2G(J), INTERP);
      J->state = LJ_TRACE_IDLE;
      lj_dispatch_update(J2G(J));
      return NULL;
    }
  } while (J->state > LJ_TRACE_RECORD);
  return NULL;
}

它根据不同的状态执行不同的操作函数,我们可以简化为:

cpp 复制代码
/* State machine for the trace compiler. Protected callback. */
static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud)
{
  jit_State *J = (jit_State *)ud;
  UNUSED(dummy);
  do {
  retry:
    switch (J->state) {
    case LJ_TRACE_START:
      J->state = LJ_TRACE_RECORD;  /* trace_start() may change state. */
      trace_start(J);
      lj_dispatch_update(J2G(J));
      break;
    case LJ_TRACE_RECORD:
      lj_record_ins(J);
      break;
    case LJ_TRACE_END:
      trace_pendpatch(J, 1);
      J->state = LJ_TRACE_ASM;
      break;
    case LJ_TRACE_ASM:
      setvmstate(J2G(J), ASM);
      lj_asm_trace(J, &J->cur);
      trace_stop(J);
      setvmstate(J2G(J), INTERP);
      J->state = LJ_TRACE_IDLE;
      lj_dispatch_update(J2G(J));
      return NULL;
    default:  /* Trace aborted asynchronously. */
      setintV(L->top++, (int32_t)LJ_TRERR_RECERR);
    case LJ_TRACE_ERR:
      trace_pendpatch(J, 1);
      if (trace_abort(J))
  goto retry;
      setvmstate(J2G(J), INTERP);
      J->state = LJ_TRACE_IDLE;
      lj_dispatch_update(J2G(J));
      return NULL;
    }
  } while (J->state > LJ_TRACE_RECORD);
  return NULL;
}

Trace_start用于初始化trace结构,分配一个traceno等,它是一个数组的下标,其中比较重要的是lj_record_ins函数,它用于记录一个字节码指令,并保存为一个SSA中间代码IR形式,IR的定义在lj_ir.c中:

cpp 复制代码
/* -- IR instructions ----------------------------------------------------- */
/* IR instruction definition. Order matters, see below. ORDER IR */
#define IRDEF(_) \
  /* Guarded assertions. */ \
  /* Must be properly aligned to flip opposites (^1) and (un)ordered (^4). */ \
  _(LT,   N , ref, ref) \
  _(GE,   N , ref, ref) \
  _(LE,   N , ref, ref) \
  _(GT,   N , ref, ref) \
  \
  _(ULT,  N , ref, ref) \
  _(UGE,  N , ref, ref) \
  _(ULE,  N , ref, ref) \
  _(UGT,  N , ref, ref) \
  \
  _(EQ,   C , ref, ref) \
  _(NE,   C , ref, ref) \
  \
  _(ABC,  N , ref, ref) \
  _(RETF, S , ref, ref) \
  \
  /* Miscellaneous ops. */ \
  _(NOP,  N , ___, ___) \
  _(BASE, N , lit, lit) \
  _(PVAL, N , lit, ___) \
  _(GCSTEP, S , ___, ___) \
  _(HIOP, S , ref, ref) \
  _(LOOP, S , ___, ___) \
  _(USE,  S , ref, ___) \
  _(PHI,  S , ref, ref) \
  _(RENAME, S , ref, lit) \
  _(PROF, S , ___, ___) \
  \
  /* Constants. */ \
  _(KPRI, N , ___, ___) \
  _(KINT, N , cst, ___) \
  _(KGC,  N , cst, ___) \
  _(KPTR, N , cst, ___) \
  _(KKPTR,  N , cst, ___) \
  _(KNULL,  N , cst, ___) \
  _(KNUM, N , cst, ___) \
  _(KINT64, N , cst, ___) \
  _(KSLOT,  N , ref, lit) \
  \
  /* Bit ops. */ \
  _(BNOT, N , ref, ___) \
  _(BSWAP,  N , ref, ___) \
  _(BAND, C , ref, ref) \
  _(BOR,  C , ref, ref) \
  _(BXOR, C , ref, ref) \
  _(BSHL, N , ref, ref) \
  _(BSHR, N , ref, ref) \
  _(BSAR, N , ref, ref) \
  _(BROL, N , ref, ref) \
  _(BROR, N , ref, ref) \
  \
  /* Arithmetic ops. ORDER ARITH */ \
  _(ADD,  C , ref, ref) \
  _(SUB,  N , ref, ref) \
  _(MUL,  C , ref, ref) \
  _(DIV,  N , ref, ref) \
  _(MOD,  N , ref, ref) \
  _(POW,  N , ref, ref) \
  _(NEG,  N , ref, ref) \
  \
  _(ABS,  N , ref, ref) \
  _(ATAN2,  N , ref, ref) \
  _(LDEXP,  N , ref, ref) \
  _(MIN,  C , ref, ref) \
  _(MAX,  C , ref, ref) \
  _(FPMATH, N , ref, lit) \
  \
  /* Overflow-checking arithmetic ops. */ \
  _(ADDOV,  CW, ref, ref) \
  _(SUBOV,  NW, ref, ref) \
  _(MULOV,  CW, ref, ref) \
  \
  /* Memory ops. A = array, H = hash, U = upvalue, F = field, S = stack. */ \
  \
  /* Memory references. */ \
  _(AREF, R , ref, ref) \
  _(HREFK,  R , ref, ref) \
  _(HREF, L , ref, ref) \
  _(NEWREF, S , ref, ref) \
  _(UREFO,  LW, ref, lit) \
  _(UREFC,  LW, ref, lit) \
  _(FREF, R , ref, lit) \
  _(STRREF, N , ref, ref) \
  _(LREF, L , ___, ___) \
  \
  /* Loads and Stores. These must be in the same order. */ \
  _(ALOAD,  L , ref, ___) \
  _(HLOAD,  L , ref, ___) \
  _(ULOAD,  L , ref, ___) \
  _(FLOAD,  L , ref, lit) \
  _(XLOAD,  L , ref, lit) \
  _(SLOAD,  L , lit, lit) \
  _(VLOAD,  L , ref, ___) \
  \
  _(ASTORE, S , ref, ref) \
  _(HSTORE, S , ref, ref) \
  _(USTORE, S , ref, ref) \
  _(FSTORE, S , ref, ref) \
  _(XSTORE, S , ref, ref) \
  \
  /* Allocations. */ \
  _(SNEW, N , ref, ref)  /* CSE is ok, not marked as A. */ \
  _(XSNEW,  A , ref, ref) \
  _(TNEW, AW, lit, lit) \
  _(TDUP, AW, ref, ___) \
  _(CNEW, AW, ref, ref) \
  _(CNEWI,  NW, ref, ref)  /* CSE is ok, not marked as A. */ \
  \
  /* Buffer operations. */ \
  _(BUFHDR, L , ref, lit) \
  _(BUFPUT, L , ref, ref) \
  _(BUFSTR, A , ref, ref) \
  \
  /* Barriers. */ \
  _(TBAR, S , ref, ___) \
  _(OBAR, S , ref, ref) \
  _(XBAR, S , ___, ___) \
  \
  /* Type conversions. */ \
  _(CONV, NW, ref, lit) \
  _(TOBIT,  N , ref, ref) \
  _(TOSTR,  N , ref, lit) \
  _(STRTO,  N , ref, ___) \
  \
  /* Calls. */ \
  _(CALLN,  N , ref, lit) \
  _(CALLA,  A , ref, lit) \
  _(CALLL,  L , ref, lit) \
  _(CALLS,  S , ref, lit) \
  _(CALLXS, S , ref, ref) \
  _(CARG, N , ref, ref) \
  \
  /* End of list. */

多种情况都会出现结束记录的情况,如遇到了已经编译的指令。在LJ_TRACE_ASM状态下会进行代码的编译操作lj_asm_trace函数位于lj_asm.c中,函数中有一个循环如下:

cpp 复制代码
  /* Assemble a trace in linear backwards order. */
  for (as->curins--; as->curins > as->stopins; as->curins--) {
    IRIns *ir = IR(as->curins);
    lua_assert(!(LJ_32 && irt_isint64(ir->t)));  /* Handled by SPLIT. */
    if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE))
continue;  /* Dead-code elimination can be soooo easy. */
    if (irt_isguard(ir->t))
asm_snap_prep(as);
    RA_DBG_REF();
    checkmclim(as);
    asm_ir(as, ir);
  }

它调用asm_ir将所有的ir指令转换成机器码,在lj_asm_trace函数后,接着调用trace_stop函数结束一个跟踪,该函数实现如下:

cpp 复制代码
/* Stop tracing. */
static void trace_stop(jit_State *J)
{
  BCIns *pc = mref(J->cur.startpc, BCIns);
  BCOp op = bc_op(J->cur.startins);
  GCproto *pt = &gcref(J->cur.startpt)->pt;
  TraceNo traceno = J->cur.traceno;
  GCtrace *T = J->curfinal;
  lua_State *L;
  switch (op) {
  case BC_FORL:
    setbc_op(pc+bc_j(J->cur.startins), BC_JFORI);  /* Patch FORI, too. */
    /* fallthrough */
  case BC_LOOP:
  case BC_ITERL:
  case BC_FUNCF:
    /* Patch bytecode of starting instruction in root trace. */
    setbc_op(pc, (int)op+(int)BC_JLOOP-(int)BC_LOOP);
    setbc_d(pc, traceno);
  addroot:
    /* Add to root trace chain in prototype. */
    J->cur.nextroot = pt->trace;
    pt->trace = (TraceNo1)traceno;
    break;
  case BC_RET:
  case BC_RET0:
  case BC_RET1:
    *pc = BCINS_AD(BC_JLOOP, J->cur.snap[0].nslots, traceno);
    goto addroot;
  case BC_JMP:
    /* Patch exit branch in parent to side trace entry. */
    lua_assert(J->parent != 0 && J->cur.root != 0);
    lj_asm_patchexit(J, traceref(J, J->parent), J->exitno, J->cur.mcode);
    /* Avoid compiling a side trace twice (stack resizing uses parent exit). */
    traceref(J, J->parent)->snap[J->exitno].count = SNAPCOUNT_DONE;
    /* Add to side trace chain in root trace. */
    {
      GCtrace *root = traceref(J, J->cur.root);
      root->nchild++;
      J->cur.nextside = root->nextside;
      root->nextside = (TraceNo1)traceno;
    }
    break;
  case BC_CALLM:
  case BC_CALL:
  case BC_ITERC:
    /* Trace stitching: patch link of previous trace. */
    traceref(J, J->exitno)->link = traceno;
    break;
  default:
    lua_assert(0);
    break;
  }
  
  /* Commit new mcode only after all patching is done. */
  lj_mcode_commit(J, J->cur.mcode);
  J->postproc = LJ_POST_NONE;
  trace_save(J, T);
  
  L = J->L;
  lj_vmevent_send(L, TRACE,
    setstrV(L, L->top++, lj_str_newlit(L, "stop"));
    setintV(L->top++, traceno);
    setfuncV(L, L->top++, J->fn);
  );
}

它通过如下两个函数:

cpp 复制代码
setbc_op(pc, (int)op+(int)BC_JLOOP-(int)BC_LOOP);
setbc_d(pc, traceno);

重新设置指令的opcode,即J_op = op + BC_JLOOP -- BC_LOOP,那么如果将lj_bc.h中的指令随意打乱会影响到这里的正确性。

修改后的指令为:j_op traceno

同时可以看到pt->trace字段记录的是一个traceno

cpp 复制代码
pt->trace = (TraceNo1)traceno;

那么接下来看解释器中对JFORL的实现:

cpp 复制代码
case BC_JFORI:
  case BC_JFORL:
#if !LJ_HASJIT
    break;
#endif
  case BC_FORI:
  case BC_IFORL:
    vk = (op == BC_IFORL || op == BC_JFORL);
    |  ins_AJ // RA = base, RD = target (after end of loop or start of loop)
    |  lea RA, [BASE+RA*8]
    if (LJ_DUALNUM) {
      |  cmp FOR_TIDX, LJ_TISNUM; jne >9
      if (!vk) {
       |  cmp FOR_TSTOP, LJ_TISNUM; jne ->vmeta_for
       |  cmp FOR_TSTEP, LJ_TISNUM; jne ->vmeta_for
       |  mov RB, dword FOR_IDX
       |  cmp dword FOR_STEP, 0; jl >5
      } else {
#ifdef LUA_USE_ASSERT
       |  cmp FOR_TSTOP, LJ_TISNUM; jne ->assert_bad_for_arg_type
       |  cmp FOR_TSTEP, LJ_TISNUM; jne ->assert_bad_for_arg_type
#endif
       |  mov RB, dword FOR_STEP
       |  test RB, RB; js >5
       |  add RB, dword FOR_IDX; jo >1
       |  mov dword FOR_IDX, RB
      }
      |  cmp RB, dword FOR_STOP
      |  mov FOR_TEXT, LJ_TISNUM
      |  mov dword FOR_EXT, RB
      if (op == BC_FORI) {
       |  jle >7
       |1:
       |6:
       |  branchPC RD
      } else if (op == BC_JFORI) {
       |  branchPC RD
       |  movzx RD, PC_RD
       |  jle =>BC_JLOOP
       |1:
       |6:
      } else if (op == BC_IFORL) {
       |  jg >7
       |6:
       |  branchPC RD
       |1:
      } else {
       |  jle =>BC_JLOOP
       |1:
       |6:
      }

当op = JFORL时,跳转到BC_JLOOP,如下:

cpp 复制代码
case BC_JLOOP:
    |.if JIT
    |  ins_AD      // RA = base (ignored), RD = traceno
    |  mov RA, [DISPATCH+DISPATCH_J(trace)]
    |  mov TRACE:RD, [RA+RD*4]
    |  mov RDa, TRACE:RD->mcode
    |  mov L:RB, SAVE_L
    |  mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
    |  mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
    |  // Save additional callee-save registers only used in compiled code.
    |.if X64WIN
    |  mov TMPQ, r12
    |  mov TMPa, r13
    |  mov CSAVE_4, r14
    |  mov CSAVE_3, r15
    |  mov RAa, rsp
    |  sub rsp, 9*16+4*8
    |  movdqa [RAa], xmm6
    |  movdqa [RAa-1*16], xmm7
    |  movdqa [RAa-2*16], xmm8
    |  movdqa [RAa-3*16], xmm9
    |  movdqa [RAa-4*16], xmm10
    |  movdqa [RAa-5*16], xmm11
    |  movdqa [RAa-6*16], xmm12
    |  movdqa [RAa-7*16], xmm13
    |  movdqa [RAa-8*16], xmm14
    |  movdqa [RAa-9*16], xmm15
    |.elif X64
    |  mov TMPQ, r12
    |  mov TMPa, r13
    |  sub rsp, 16
    |.endif
    |  jmp RDa
    |.endif
break;

先根据RD中保存的traceno获取到trace结构,并将trace结构中保存的机器码赋值在Rda中,进行堆栈转换后,jmp Rda直接跳转到机器码处执行。

在x86中,当字节码执行结束,继续执行下一个字节码时,都会使用ins_next块,它的定义如下:

cpp 复制代码
|.macro ins_NEXT
|  mov RC, [PC]
|  movzx RA, RCH
|  movzx OP, RCL
|  add PC, 4
|  shr RC, 16
|.if X64
|  jmp aword [DISPATCH+OP*8]
|.else
|  jmp aword [DISPATCH+OP*4]
|.endif
|.endmacro

它从PC指向的字节码中获取了opcode,并跳转到DISPATCH + OP *4的地方执行,可以看出OP实质上保存的是数组的下标而这些数组元素都指向了vm_record汇编块:

cpp 复制代码
|->vm_record:                        // Dispatch target for recording phase.
 |.if JIT
 |  movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
 |  test RDL, HOOK_VMEVENT       // No recording while in vmevent.
 |  jnz >5
 |  // Decrement the hookcount for consistency, but always do the call.
 |  test RDL, HOOK_ACTIVE
 |  jnz >1
 |  test RDL, LUA_MASKLINE|LUA_MASKCOUNT
 |  jz >1
 |  dec dword [DISPATCH+DISPATCH_GL(hookcount)]
 |  jmp >1
 |.endif
 |
 |->vm_rethook:               // Dispatch target for return hooks.
 |  movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
 |  test RDL, HOOK_ACTIVE            // Hook already active?
 |  jnz >5
 |  jmp >1
 |
 |->vm_inshook:               // Dispatch target for instr/line hooks.
 |  movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
 |  test RDL, HOOK_ACTIVE            // Hook already active?
 |  jnz >5
 |
 |  test RDL, LUA_MASKLINE|LUA_MASKCOUNT
 |  jz >5
 |  dec dword [DISPATCH+DISPATCH_GL(hookcount)]
 |  jz >1
 |  test RDL, LUA_MASKLINE
 |  jz >5
 |1:
 |  mov L:RB, SAVE_L
 |  mov L:RB->base, BASE
 |  mov FCARG2, PC               // Caveat: FCARG2 == BASE
 |  mov FCARG1, L:RB
 |  // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
 |  call extern lj_dispatch_ins@8      // (lua_State *L, const BCIns *pc)
 |3:
 |  mov BASE, L:RB->base
 |4:
 |  movzx RA, PC_RA
 |5:
 |  movzx OP, PC_OP
 |  movzx RD, PC_RD
 |.if X64
 |  jmp aword [DISPATCH+OP*8+GG_DISP2STATIC]   // Re-dispatch to static ins.
 |.else
 |  jmp aword [DISPATCH+OP*4+GG_DISP2STATIC]   // Re-dispatch to static ins.
 |.endif

调用lj_dispatch_ins后,最终跳转到DISPATCH+OP*4+GG_DISP2STATIC这个地址继续执行,这个地址正是每个opcode对应的解释器汇编块。

Jit的正常运行还涉及堆栈状态的转换、jit模式到解释模式的跳转等(SSA守护代码),远不止这些。

相关推荐
前行的小黑炭12 分钟前
设计模式:为什么使用模板设计模式(不相同的步骤进行抽取,使用不同的子类实现)减少重复代码,让代码更好维护。
android·java·kotlin
Java技术小馆18 分钟前
如何设计一个本地缓存
java·面试·架构
XuanXu1 小时前
Java AQS原理以及应用
java
风象南4 小时前
SpringBoot中6种自定义starter开发方法
java·spring boot·后端
mghio13 小时前
Dubbo 中的集群容错
java·微服务·dubbo
咖啡教室18 小时前
java日常开发笔记和开发问题记录
java
咖啡教室18 小时前
java练习项目记录笔记
java
鱼樱前端18 小时前
maven的基础安装和使用--mac/window版本
java·后端
RainbowSea19 小时前
6. RabbitMQ 死信队列的详细操作编写
java·消息队列·rabbitmq