Lua源码剖析(二)虚拟机

一. 简介

  脚本语言,即动态语言,其相较于静态语言如C/C++/GO等,一大区别即不需要经过计算机的预处理、编译、汇编、链接,而是由虚拟机代为完成上述过程。虚拟机模拟真实的步骤,首先将脚本语言转化为特定的opcode(各脚本语言自行定义),接着放在虚拟机中逐个执行,模拟了CPU及内存的基本功能。Lua作为一门嵌入式语言,其虚拟机附着于宿主环境中而非单独存在,其核心问题主要包括:

  • 如何分析源代码文件生成Opcode
  • 如何执行Opcode指令
  • 如何保存整个执行环境

  本文将就以上三方面进行详细剖析,源码版本为Lua5.3.4。Lua脚本的加载、编译和执行通过函数luaL_doFile()进行,本文就由此函数开始展开分析。该函数实际是一个宏定义,如下所示:

1
2
3
// lauxlib.h
#define luaL_dofile(L, fn) \
(luaL_loadfile(L, fn) || lua_pcall(L, 0, LUA_MULTRET, 0))

  主要做了两件事:

  • 调用luaL_loadfile(L, fn)生成opcode
  • 调用lua_pcall()执行

  下面几节就详细展开进行叙述,并补充LuaJIT的实现方式的不同之处。

二. opcode的生成

  从源码生成opcode,需要以下步骤:

  • 初始化Lua虚拟机数据结构
  • 读取Lua脚本文件内容
  • 依次对Lua脚本文件进行词法分析、语法分析、语义分析,最后生成该文件的Lua虚拟机指令.注意以上的过程仅需要一次遍历,这是Lua解释器做的非常好的地方

  luaL_loadfile()最终调用f_parser()函数,对Lua代码进行语法语义分析。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
// ldo.c
static void f_parser (lua_State *L, void *ud)
{
LClosure *cl;
struct SParser *p = cast(struct SParser *, ud);
int c = zgetc(p->z); /* read first character */
if (c == LUA_SIGNATURE[0]) {
checkmode(L, p->mode, "binary");
cl = luaU_undump(L, p->z, p->name);
}
else {
checkmode(L, p->mode, "text");
cl = luaY_parser(L, p->z, &p->buff, &p->dyd, p->name, c);
}
lua_assert(cl->nupvalues == cl->p->sizeupvalues);
luaF_initupvals(L, cl);
}

  实际解析根据文件类型选择通过luaU_undump()或者luaY_parser()函数执行,保存在Closure中并压入栈,进入后续的opcode执行过程。

三. opcode的执行

  lua_pcall()对栈中的opcode进行实际的执行。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
(lapi.c)
#define lua_pcall(L,n,r,f) lua_pcallk(L, (n), (r), (f), 0, NULL)

LUA_API int lua_pcallk(lua_State *L, int nargs, int nresults, int errfunc,
lua_KContext ctx, lua_KFunction k)
{
struct CallS c;
int status;
ptrdiff_t func;
....
c.func = L->top - (nargs + 1); /* function to be called */
if (k == NULL || L->nny > 0)
{ /* no continuation or no yieldable? */
c.nresults = nresults; /* do a 'conventional' protected call */
status = luaD_pcall(L, f_call, &c, savestack(L, c.func), func);
}
else
{ /* prepare continuation (call is already protected by 'resume') */
CallInfo *ci = L->ci;
ci->u.c.k = k; /* save continuation */
ci->u.c.ctx = ctx; /* save context */
/* save information for error recovery */
ci->extra = savestack(L, c.func);
ci->u.c.old_errfunc = L->errfunc;
L->errfunc = func;
setoah(ci->callstatus, L->allowhook); /* save value of 'allowhook' */
ci->callstatus |= CIST_YPCALL; /* function can do error recovery */
luaD_call(L, c.func, nresults); /* do the call */
ci->callstatus &= ~CIST_YPCALL;
L->errfunc = ci->u.c.old_errfunc;
status = LUA_OK; /* if it is here, there were no errors */
}
adjustresults(L, nresults);
lua_unlock(L);
return status;
}

  该函数主要流程为

  • 获取需要调用的函数指针c.func
  • 如果不是连续调用则调用luaD_pcall()执行代码,否则调用luaD_call()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
// ldo.c
int luaD_pcall(lua_State *L, Pfunc func, void *u,
ptrdiff_t old_top, ptrdiff_t ef)
{
int status;
CallInfo *old_ci = L->ci;
lu_byte old_allowhooks = L->allowhook;
unsigned short old_nny = L->nny;
ptrdiff_t old_errfunc = L->errfunc;
L->errfunc = ef;
status = luaD_rawrunprotected(L, func, u);
if (status != LUA_OK)
{ /* an error occurred? */
StkId oldtop = restorestack(L, old_top);
luaF_close(L, oldtop); /* close possible pending closures */
seterrorobj(L, status, oldtop);
L->ci = old_ci;
L->allowhook = old_allowhooks;
L->nny = old_nny;
luaD_shrinkstack(L);
}
L->errfunc = old_errfunc;
return status;
}

  实际需要关注的是luaD_call()函数,其主要调用luaD_precall()进行预处理,然后调用luaV_execute()执行。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
/*
** Call a function (C or Lua). The function to be called is at *func.
** The arguments are on the stack, right after the function.
** When returns, all the results are on the stack, starting at the original
** function position.
*/
void luaD_call(lua_State *L, StkId func, int nResults)
{
if (++L->nCcalls >= LUAI_MAXCCALLS)
stackerror(L);
if (!luaD_precall(L, func, nResults)) /* is a Lua function? */
luaV_execute(L); /* call it */
L->nCcalls--;
}

  luaD_precall()函数主要逻辑在注释中已经说明的很清晰了。对于C函数直接调用,后续通过luaD_poscall()返回上一层级,对于lua函数则使用luaV_execute()进行实际调用工作。其中关键操作包括:

  • 调用next_ci()lua_StateCallInfo数组中得到一个新的CallInfo结构体,设置它的func/base/top指针
  • 调用setnilvalue()给多余参数赋值为nil
  • lua_Statetop/base指针赋值为CallInfo的值,供给luaV_execute()执行
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
/*
** Prepares a function call: checks the stack, creates a new CallInfo
** entry, fills in the relevant information, calls hook if needed.
** If function is a C function, does the call, too. (Otherwise, leave
** the execution ('luaV_execute') to the caller, to allow stackless
** calls.) Returns true iff function has been executed (C function).
*/
int luaD_precall(lua_State *L, StkId func, int nresults)
{
lua_CFunction f;
CallInfo *ci;
switch (ttype(func))
{
case LUA_TCCL: /* C closure */
f = clCvalue(func)->f;
goto Cfunc;
case LUA_TLCF: /* light C function */
f = fvalue(func);
Cfunc:
{
int n; /* number of returns */
checkstackp(L, LUA_MINSTACK, func); /* ensure minimum stack size */
ci = next_ci(L); /* now 'enter' new function */
ci->nresults = nresults;
ci->func = func;
ci->top = L->top + LUA_MINSTACK;
lua_assert(ci->top <= L->stack_last);
ci->callstatus = 0;
if (L->hookmask & LUA_MASKCALL)
luaD_hook(L, LUA_HOOKCALL, -1);
lua_unlock(L);
n = (*f)(L); /* do the actual call */
lua_lock(L);
api_checknelems(L, n);
luaD_poscall(L, ci, L->top - n, n);
return 1;
}
case LUA_TLCL:
{ /* Lua function: prepare its call */
StkId base;
Proto *p = clLvalue(func)->p;
int n = cast_int(L->top - func) - 1; /* number of real arguments */
int fsize = p->maxstacksize; /* frame size */
checkstackp(L, fsize, func);
if (p->is_vararg)
base = adjust_varargs(L, p, n);
else
{ /* non vararg function */
for (; n < p->numparams; n++)
setnilvalue(L->top++); /* complete missing arguments */
base = func + 1;
}
ci = next_ci(L); /* now 'enter' new function */
ci->nresults = nresults;
ci->func = func;
ci->u.l.base = base;
L->top = ci->top = base + fsize;
lua_assert(ci->top <= L->stack_last);
ci->u.l.savedpc = p->code; /* 设置PC寄存器的位置,即起始函数 starting point */
ci->callstatus = CIST_LUA;
if (L->hookmask & LUA_MASKCALL)
callhook(L, ci);
return 0;
}
default:
{ /* not a function */
checkstackp(L, 1, func); /* ensure space for metamethod */
tryfuncTM(L, func); /* try to get '__call' metamethod */
return luaD_precall(L, func, nresults); /* now it must be a function */
}
}
}

  luaV_execute()中根据实际的字节码类型进行实际的操作,取的是在luaD_precall()中压入的函数。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
void luaV_execute(lua_State *L)
{
CallInfo *ci = L->ci;
LClosure *cl;
TValue *k;
StkId base;
ci->callstatus |= CIST_FRESH; /* fresh invocation of 'luaV_execute" */
newframe: /* reentry point when frame changes (call/return) */
lua_assert(ci == L->ci);
cl = clLvalue(ci->func); /* local reference to function's closure */
k = cl->p->k; /* local reference to function's constant table */
base = ci->u.l.base; /* local copy of function's base */
/* main loop of interpreter */
for (;;)
{
Instruction i;
StkId ra;
vmfetch();
vmdispatch(GET_OPCODE(i))
{
vmcase(OP_MOVE)
{
setobjs2s(L, ra, RB(i));
vmbreak;
}
...
}
}
}

四. 上下文环境切换

  

参考文献

  1. Lua-Source-Internal
坚持原创,坚持分享,谢谢鼓励和支持