Comments (4)
_st_iterate_threads
分析如何迭代所有的coroutines。
_ST_THREADQ
这个是所有线程的列表,迭代所有的coroutines当然是要迭代这个双向链表,它会在创建线程时把线程加到链表中。
_st_thread_t *st_thread_create(void *(*start)(void *arg), void *arg, int joinable, int stk_size)
{
#ifdef DEBUG
_ST_ADD_THREADQ(thread);
#endif
这个ADD宏定义,详细实现如下图,简单来说就是把thread添加到双向链表_st_this_vp.thread_q
中:
当我们迭代线程链表时,我们需要根据thread的成员tlinks,来获取thread的指针:
#define _ST_THREAD_THREADQ_PTR(_qp) \
((_st_thread_t *)((char *)(_qp) - offsetof(_st_thread_t, tlink)))
struct _st_thread {
_st_clist_t tlink; /* For putting on thread queue */
// https://stackoverflow.com/questions/7897877/how-does-the-c-offsetof-macro-work
#define offsetof(a,b) ((int)(&(((a*)(0))->b)))
(gdb) p ((int)(&(((_st_thread_t*)(0))->tlink)))
$38 = 72
// 展开上面的宏定义,可以获取到所有的coroutine的入口
(gdb) p *(_st_thread_t*)(((char*)_st_this_vp.thread_q.next) - ((int)(&(((_st_thread_t*)(0))->tlink))))
$46 = {state = 1, flags = 2, start = 0x5a565e <_st_idle_thread_start>, arg = 0x0, retval = 0x0, stack = 0xc2e190, links = {
next = 0x9d7ab0 <_st_this_vp+16>, prev = 0x9d7ab0 <_st_this_vp+16>}, wait_links = {next = 0x0, prev = 0x0}, tlink = {
next = 0xc2e218, prev = 0x9d7ae0 <_st_this_vp+64>}, due = 0, left = 0x0, right = 0x0, heap_index = 0,
private_data = 0x7f4444cf2f80, term = 0x0, context = {{__jmpbuf = {4096, 139931188931920, 12653800, 12654984, 0, 0,
139931188931888, 5920469}, __mask_was_saved = 0, __saved_mask = {__val = {0 <repeats 16 times>}}}}}
coroutine RSP
上面我们可以拿到每个thread的jmp_buf,也就是保存的堆栈信息。我们可以拿到RSP地址:
(gdb) info frame
Stack level 1, frame at 0x7fffffffe560:
called by frame at 0x7fffffffe580, caller of frame at 0x7fffffffe530
(gdb) p $rsp
$14 = (void *) 0x7fffffffe530
#elif defined(__amd64__) || defined(__x86_64__)
#define JB_RBX 0
#define JB_RBP 1
#define JB_R12 2
#define JB_R13 3
#define JB_R14 4
#define JB_R15 5
#define JB_RSP 6
#define JB_PC 7
(gdb) p/x (*(_st_thread_t*)(((char*)_st_this_vp.thread_q.next) - ((int)(&(((_st_thread_t*)(0))->tlink))))).context
$49 = {{__jmpbuf = {0x1000, 0x7f4444cf2d50, 0xc114e8, 0xc11988, 0x0, 0x0, 0x7f4444cf2d30, 0x5a56d5}, __mask_was_saved = 0x0,
__saved_mask = {__val = {0x0 <repeats 16 times>}}}}
倒数第二个就是RSP的值了,注意第一个是idle coroutine,第二个是一个原生coroutine也就是main(它的start是0x00):
/*
* Initialize primordial thread
*/
thread = (_st_thread_t *) calloc(1, sizeof(_st_thread_t) + (ST_KEYS_MAX * sizeof(void *)));
if (!thread)
return -1;
thread->private_data = (void **) (thread + 1);
thread->state = _ST_ST_RUNNING;
thread->flags = _ST_FL_PRIMORDIAL;
_ST_SET_CURRENT_THREAD(thread);
_st_active_count++;
#ifdef DEBUG
_ST_ADD_THREADQ(thread);
#endif
(gdb) p (*(_st_thread_t*)(((char*)_st_this_vp.thread_q.next.next) - ((int)(&(((_st_thread_t*)(0))->tlink)))))
$7 = {state = 5, flags = 5, start = 0x0, arg = 0x0, retval = 0x0, stack = 0x0, links = {next = 0x9d7ab0 <_st_this_vp+16>,
prev = 0x9d7ab0 <_st_this_vp+16>}, wait_links = {next = 0x0, prev = 0x0}, tlink = {next = 0x7f4444ce0e78, prev = 0x7f4444cf2e78},
due = 1603639789498746, left = 0x7f43ba533e30, right = 0x7f43b493be30, heap_index = 44, private_data = 0xc2e320, term = 0x0, context = {{
__jmpbuf = {12646464, 140721635898384, 12653800, 12654984, 0, 0, 140721635898352, 5925227}, __mask_was_saved = 0, __saved_mask = {__val = {
0 <repeats 16 times>}}}}}
堆栈分析看后面的注释。
iterator coroutines
当能拿到coroutine的context时,就可以直接跳到这个coroutine执行:
void _st_iterate_threads(void)
{
static _st_thread_t *thread = NULL;
static jmp_buf orig_jb, save_jb;
_st_clist_t *q;
if (!_st_iterate_threads_flag) {
if (thread) {
memcpy(thread->context, save_jb, sizeof(jmp_buf));
MD_LONGJMP(orig_jb, 1);
}
return;
}
if (thread) {
memcpy(thread->context, save_jb, sizeof(jmp_buf));
_st_show_thread_stack(thread, NULL);
} else {
if (MD_SETJMP(orig_jb)) {
_st_iterate_threads_flag = 0;
thread = NULL;
_st_show_thread_stack(thread, "Iteration completed");
return;
}
thread = _ST_CURRENT_THREAD();
_st_show_thread_stack(thread, "Iteration started");
}
q = thread->tlink.next;
if (q == &_ST_THREADQ)
q = q->next;
ST_ASSERT(q != &_ST_THREADQ);
thread = _ST_THREAD_THREADQ_PTR(q);
if (thread == _ST_CURRENT_THREAD())
MD_LONGJMP(orig_jb, 1);
memcpy(save_jb, thread->context, sizeof(jmp_buf));
MD_LONGJMP(thread->context, 1);
}
注意这个函数,使用的是裸MD_LONGJMP和MD_SETJMP,它的执行顺序很不直观,需要逐步分析:
// 实际上这个setjmp第一次执行是返回0的。
// 而longjmp回来到这里时返回的是1。
if (MD_SETJMP(orig_jb)) {
// 这里意味着是longjmp(orig_jb),迭代所有coroutine结束了。
_st_iterate_threads_flag = 0;
thread = NULL;
_st_show_thread_stack(thread, "Iteration completed");
return;
}
// 这里是setjmp结束,也就是开始迭代,thread保存为当前的coroutine,从它开始迭代。
thread = _ST_CURRENT_THREAD();
_st_show_thread_stack(thread, "Iteration started");
Remark:这就是longjmp(ctx, val)第二个参数的原因,实际上这个val是给setjmp用的。
Remark: 只有打开汇编模式,用si执行,才能看到longjmp到setjmp的这个地方来。
后面就开始迭代各个coroutine执行了:
// 这个就是迭代所有的coroutine的链表,如前面的分析。
q = thread->tlink.next;
if (q == &_ST_THREADQ)
q = q->next;
ST_ASSERT(q != &_ST_THREADQ);
thread = _ST_THREAD_THREADQ_PTR(q);
// 如果是开始迭代的thread,并不是跳到thread的context,而是到这个迭代函数的context。
if (thread == _ST_CURRENT_THREAD())
MD_LONGJMP(orig_jb, 1); // 到上面的返回的地方了。
// 把coroutine的context保存一份,后面有用。
memcpy(save_jb, thread->context, sizeof(jmp_buf));
MD_LONGJMP(thread->context, 1);
注意下面代码实际上是迭代开始之后才会执行,而开始迭代并不会执行,从执行顺序看正好是反的:
// 还原thread的context,这时候是跳到了coroutine的堆栈,
// 但是执行时还是会再次进入这个迭代,所以还原后就继续切下一个线程了,
// 就相当于进了下coroutine的堆栈,然后又切走了。
if (thread) {
memcpy(thread->context, save_jb, sizeof(jmp_buf));
_st_show_thread_stack(thread, NULL);
// 比如如果有个coroutine是调用了sleep而切走的,
// 那么它堆栈就是在_ST_SWITCH_CONTEXT(me);这个地方,
// 但这个是一段代码,从迭代切到这里后,又立刻回到了迭代函数中去了。
int st_usleep(st_utime_t usecs)
{
_st_thread_t *me = _ST_CURRENT_THREAD();
if (me->flags & _ST_FL_INTERRUPT) {
me->flags &= ~_ST_FL_INTERRUPT;
errno = EINTR;
return -1;
}
if (usecs != ST_UTIME_NO_TIMEOUT) {
me->state = _ST_ST_SLEEPING;
_ST_ADD_SLEEPQ(me, usecs);
} else
me->state = _ST_ST_SUSPENDED;
_ST_SWITCH_CONTEXT(me);
上面的SWITCH的宏实际上是一段代码:
ST_SWITCH_OUT_CB(_thread); \
if (!MD_SETJMP((_thread)->context)) { \
_st_vp_schedule(); \
} \
ST_DEBUG_ITERATE_THREADS(); \
同样的这个setjmp在sleep第一次调用时为0,所以进入schedule切走。
而被迭代函数longjmp回来时,又再次进入了迭代函数,再恢复了这个coroutine的context。
所以下次再执行时这个还是sleep的地方,并不会对coroutine有什么影响。
最后一段代码,是如果中途改变了flag,则直接恢复thread的context,并结束迭代:
// _st_iterate_threads_flag=0, thread != NULL,只有手动设置flag=0才会实现,
// 正常逻辑肯定是flag=0时,thread肯定NULL了。
if (!_st_iterate_threads_flag) {
if (thread) {
memcpy(thread->context, save_jb, sizeof(jmp_buf));
MD_LONGJMP(orig_jb, 1);
}
return;
}
这个调用链条如下图所示:
这样也能看懂调度的逻辑:
GDB scripts
根据上面的分析,我们可以写一个GDB脚本gdb/srs.py,可以看当前或者coredump中有多少个coroutines:
(gdb) source gdb/srs.py
(gdb) nn_coroutines
this coroutine(&_st_this_thread->tlink) is: 0x7f43ba761e78
next is 0x7f43b92d9e78, total 500
next is 0x7f43b5c37e78, total 1000
next is 0x7f43bfd71e78, total 31500
next is 0x7f43bdad9e78, total 32000
next is 0x7f43bd8f3e78, total 32500
total coroutines: 32717
从上面可以看出,一共有3.2万个coroutine,如果使用mmap会出问题,可以用这个程序验证。参考SRS #509。
Conclusion
_st_this_vp.thread_q
中有所有的coroutine,可以手动获取它们的堆栈信息。_st_iterate_threads
迭代所有的coroutine,它本身直接longjmp到coroutine,获取堆栈后又再进入它还原coroutine。- setjmp第一次调用返回0,而longjmp到setjmp的地方时返回值是1,只有通过
si
汇编单步调试才能看到。
from state-threads.
_st_stack_new
coroutine的堆栈,是在调用函数st_thread_create时创建的,创建后顶部和尾部4k的区间mprotect设置为REDZONE,访问这个区间的内存时会直接报错,也就是堆栈消耗完了:
_st_thread_t *st_thread_create(void *(*start)(void *arg), void *arg, int joinable, int stk_size)
{
stack = _st_stack_new(stk_size);
_st_stack_t *_st_stack_new(int stack_size)
{
ts->vaddr = _st_new_stk_segment(ts->vaddr_size);
#ifdef DEBUG
mprotect(ts->vaddr, REDZONE, PROT_NONE);
mprotect(ts->stk_top + extra, REDZONE, PROT_NONE);
#endif
可以通过mmap看到分配的内存块:
[root@05ff04a933cd trunk]# cat /proc/5904/maps
7ffa1f4df000-7ffa1f4e0000 ---p 00000000 00:00 0 // 这个是不可访问的REDZONE,栈底4K内存。
7ffa1f4e0000-7ffa1f4f0000 rwxp 00000000 00:00 0 // 这个是coroutine的64KB堆栈。
7ffa1f4f0000-7ffa1f4f2000 ---p 00000000 00:00 0 // 这个是不可访问的REDZONE,栈顶4K内存。
但是在core中是看不到这块mmap的内存的:
(gdb) info proc m
Mapped address spaces:
Start Addr End Addr Size Offset objfile
0x7ffa1f2ee000 0x7ffa1f310000 0x22000 0x0 /usr/lib64/ld-2.17.so
0x7ffa1f50f000 0x7ffa1f510000 0x1000 0x21000 /usr/lib64/ld-2.17.so
7ffa1f2ee000-7ffa1f310000 r-xp 00000000 fe:01 1969247 /usr/lib64/ld-2.17.so // 这个有
7ffa1f4df000-7ffa1f4e0000 ---p 00000000 00:00 0 // 这个没有
7ffa1f4e0000-7ffa1f4f0000 rwxp 00000000 00:00 0 // 这个没有
7ffa1f4f0000-7ffa1f4f2000 ---p 00000000 00:00 0 // 这个没有
7ffa1f50f000-7ffa1f510000 r-xp 00021000 fe:01 1969247 /usr/lib64/ld-2.17.so // 这个有
内存分配后,会以下图方式布局,thread对象实际上是从里面分配的:
而实际分配内存是下面的函数,它可能从heap或者mmap分配:
static char *_st_new_stk_segment(int size)
{
#ifdef MALLOC_STACK
void *vaddr = malloc(size);
#else
vaddr = mmap(NULL, size, PROT_READ | PROT_WRITE, mmap_flags, zero_fd, 0);
#endif
mmap时,在coredump文件中,vaddr地址是不可访问,而在gdb运行时却不是:
// GDB调试时,可以看到vaddr是0x7f6f114dc000
[root@05ff04a933cd research]# gdb -p 2087
(gdb) p *(*(_st_thread_t*)(((char*)_st_this_vp.thread_q.next.next.next) - ((int)(&(((_st_thread_t*)(0))->tlink)))))->stack
$1 = {links = {next = 0x0, prev = 0x0}, vaddr = 0x7f6f114dc000 "", vaddr_size = 73728, stk_size = 65536, stk_bottom = 0x7f6f114dd000 "",
stk_top = 0x7f6f114ed000 "", sp = 0x7f6f114ecd80}
// 生成core文件
[root@05ff04a933cd research]# kill -11 2087
// 这里vaddr和top都是不能访问,而bottom和sp没问题。
[root@05ff04a933cd trunk]# gdb objs/srs -c core
(gdb) p *(*(_st_thread_t*)(((char*)_st_this_vp.thread_q.next.next.next) - ((int)(&(((_st_thread_t*)(0))->tlink)))))->stack
$1 = {links = {next = 0x0, prev = 0x0}, vaddr = 0x7f6f114dc000 <Address 0x7f6f114dc000 out of bounds>, vaddr_size = 73728, stk_size = 65536,
stk_bottom = 0x7f6f114dd000 "", stk_top = 0x7f6f114ed000 "", sp = 0x7f6f114ecd80}
实际上mmap的堆栈是不释放的,所以这个地方并不是内存破坏,而是就是这么显示的。
vaddr = 0x7f6f114dc000 <Address 0x7f6f114dc000 out of bounds>,
vaddr_size = 73728,
stk_size = 65536,
stk_bottom = 0x7f6f114dd000 "",
stk_top = 0x7f6f114ed000 "",
sp = 0x7f6f114ecd80
coroutine stack
根据rsp和stack sp,就可以知道coroutine的堆栈信息,把堆栈打出来就可以看到调用过程了,比如第三个coroutine:
// RSP是倒数第二个:0x7f4444ce0b90
(gdb) p/x (*(_st_thread_t*)(((char*)_st_this_vp.thread_q.next.next.next) - ((int)(&(((_st_thread_t*)(0))->tlink))))).context.__jmpbuf
$16 = {0x1000, 0x7f4444ce0c00, 0x7ffc4f1eb360, 0xc11988, 0x0, 0x0, 0x7f4444ce0b90, 0x5a5287}
// SP就是coroutine的堆栈开始点。
(gdb) p/x *(*(_st_thread_t*)(((char*)_st_this_vp.thread_q.next.next.next) - ((int)(&(((_st_thread_t*)(0))->tlink))))).stack
$19 = {links = {next = 0x0, prev = 0x0}, vaddr = 0x7f4444cd0000, vaddr_size = 0x12000, stk_size = 0x10000, stk_bottom = 0x7f4444cd1000,
stk_top = 0x7f4444ce1000, sp = 0x7f4444ce0d80}
// 堆栈大小是496字节,也就是62个指针
(gdb) p 0x7f4444ce0d80-0x7f4444ce0b90
$20 = 496
// 可以看到这个是个listener,在accept时切走的
(gdb) x/62xa 0x7f4444ce0b90
0x7f4444ce0b90: 0x8e597a00 0xffffffffffffffff
0x7f4444ce0ba0: 0x1b3c4be30 0x7f4444ce0c30
0x7f4444ce0bb0: 0x7f43b3c4ac70 0x7f43b3cb79e0
0x7f4444ce0bc0: 0x7f4444ce0e30 0x7f4444ce0c30
0x7f4444ce0bd0: 0x100000001 0x4f9c54 <SrsSTCoroutine::start()+224>
0x7f4444ce0be0: 0x7f4444ce0e30 0x7f4444ce0c38
0x7f4444ce0bf0: 0x7f4444ce0c10 0x0
0x7f4444ce0c00: 0x7f4444ce0c40 0x5a776c <st_netfd_poll+62>
0x7f4444ce0c10: 0x7f4444ce0c40 0xffffffffffffffff
0x7f4444ce0c20: 0x144ce0c40 0xc2e450
0x7f4444ce0c30: 0x100000009 0x0
0x7f4444ce0c40: 0x7f4444ce0c90 0x5a783d <st_accept+83>
0x7f4444ce0c50: 0xffffffffffffffff 0x0
0x7f4444ce0c60: 0x0 0xc2e450
0x7f4444ce0c70: 0x947c3e18 0x0
0x7f4444ce0c80: 0x7f4444ce0cc0 0xffffffff004b8ba9
0x7f4444ce0c90: 0x7f4444ce0cc0 0x4aaa6c <srs_accept(void*, sockaddr*, int*, long)+48>
0x7f4444ce0ca0: 0xffffffffffffffff 0x0
0x7f4444ce0cb0: 0x0 0xc2e450
0x7f4444ce0cc0: 0x7f4444ce0cf0 0x589da0 <SrsTcpListener::cycle()+148>
0x7f4444ce0cd0: 0x0 0xc2e500
0x7f4444ce0ce0: 0xc786e0 0x0
0x7f4444ce0cf0: 0x7f4444ce0d20 0x4f9f96 <SrsSTCoroutine::cycle()+142>
0x7f4444ce0d00: 0x0 0xc2e5e0
0x7f4444ce0d10: 0x0 0x0
0x7f4444ce0d20: 0x7f4444ce0d50 0x4fa00b <SrsSTCoroutine::pfn(void*)+43>
0x7f4444ce0d30: 0x0 0xc2e5e0
0x7f4444ce0d40: 0x0 0xc2e5e0
0x7f4444ce0d50: 0x7f4444ce0d70 0x5a5a92 <_st_thread_main+40>
0x7f4444ce0d60: 0x0 0x7f4444ce0e30
0x7f4444ce0d70: 0x7ffc4f1eb2f0 0x5a6202 <st_thread_create+306>
// 对应代码如下:
srs_error_t SrsTcpListener::cycle()
{
while (true) {
srs_netfd_t fd = srs_accept(lfd, NULL, NULL, SRS_UTIME_NO_TIMEOUT);
coroutine frame
frame地址可以由jmpbuf算出:
Frame Address = RBP + 16 = jmpbuf[1] + 16
下面详细分析setjmp的情况。
coroutine在调度时,是调用的宏定义_ST_SWITCH_CONTEXT
,也就是切到其他的coroutine:
int st_usleep(st_utime_t usecs)
{
_st_thread_t *me = _ST_CURRENT_THREAD();
me->state = _ST_ST_SLEEPING;
_ST_ADD_SLEEPQ(me, usecs);
_ST_SWITCH_CONTEXT(me);
这个宏定义展开如下:
#define _ST_SWITCH_CONTEXT(_thread) \
if (!MD_SETJMP((_thread)->context)) { \
_st_vp_schedule(); \
} \
实际上就是setjmp。用一个最简单的调用例子,来看ST的寄存器变化,以及RBP和frame的关系:
int main(int argc, char** argv) {
st_init();
for (;;) {
st_usleep(1000 * 1000);
}
return 0;
}
// 设置断点在st_usleep的入口
B+>│0x4028a2 <st_usleep> push %rbp
│0x4028a3 <st_usleep+1> mov %rsp,%rbp
│0x4028a6 <st_usleep+4> sub $0x20,%rsp
// 这时候RBP还是上一个函数(main)的堆栈开始地址。
(gdb) p $rbp
$34 = (void *) 0x7fffffffe570
// 而fp是函数的堆栈开始的地方,fp到frame地址中间有16字节两个指针,
// 分别是main的rip和rbp,也就是从这个函数的stack中可以找到调用者的信息。
(gdb) p $fp
$33 = (void *) 0x7fffffffe550
(gdb) x/2xa $fp
0x7fffffffe550: 0x7fffffffe570 0x40113b <main(int, char**)+30>
// frame地址就是fp+16字节
(gdb) info frame
Stack level 0, frame at 0x7fffffffe560:
(gdb) p $fp
$36 = (void *) 0x7fffffffe550
当然fp这个寄存器我们并没有保存,所以我们继续执行三条汇编,看下RBP和frame关系:
B+│0x4028a2 <st_usleep> push %rbp // 保存RBP,上个函数的RBP
│0x4028a3 <st_usleep+1> mov %rsp,%rbp // 将RSP赋值给RBP,现在RBP就是这个函数的RBP了
│0x4028a6 <st_usleep+4> sub $0x20,%rsp // 堆栈下移2个指针,这个是为了保存返回值的,并不固定是2指针
>│0x4028aa <st_usleep+8> mov %rdi,-0x18(%rbp) // 从这里开始初始化函数的输入参数,rdi就是第一个参数。
// RBP就是FP了,不过FP是调用函数时设置的,而RBP是函数的入口的两条汇编设置的。
(gdb) p $rbp
$37 = (void *) 0x7fffffffe550
(gdb) p $fp
$38 = (void *) 0x7fffffffe550
(gdb) info frame
Stack level 0, frame at 0x7fffffffe560:
Notes: Linux x86_64传参方法是:第一个参数rdi,第二个rsi,第三个是rdx,第四个是rcx,第五个是r8,第六个是r9,再往后就在rsp堆栈往上存储。 返回值是rax。
从上面可以看到,如果我们知道了RBP,就可以知道Frame的地址了:
Frame Address = RBP + 16 // Linux x86_64,其他平台需要调试看看。
我们继续执行到st_usleep开始切换上下文的地方:
int st_usleep(st_utime_t usecs)
{
_st_thread_t *me = _ST_CURRENT_THREAD();
me->state = _ST_ST_SLEEPING;
_ST_ADD_SLEEPQ(me, usecs);
_ST_SWITCH_CONTEXT(me);
(gdb) f
#0 st_usleep (usecs=1000000) at sync.c:131
131 _ST_SWITCH_CONTEXT(me);
// 可以看到RBP和FP不会变化,是这个函数的堆栈开始的点,
// 而RSP堆栈指针会随着局部变量和返回值而变化。
(gdb) p $rsp
$39 = (void *) 0x7fffffffe530
(gdb) p $rbp
$40 = (void *) 0x7fffffffe550
(gdb) p $fp
$43 = (void *) 0x7fffffffe550
(gdb) info frame
Stack level 1, frame at 0x7fffffffe560:
// 下面要单步指令执行si,进入到setjmp函数的汇编实现 _st_md_cxt_save
0x402959 <st_usleep+183> callq 0x4070c0 <_st_md_cxt_save>
0x4070c0 <_st_md_cxt_save> mov %rbx,(%rdi)
// 进入函数_st_md_cxt_save后,可以看到RBP还是上个函数的RBP,而FP是由callq自动设置了。
// callq还自动把上个函数的rip,保存到了堆栈,RSP也降低了一个指针。
(gdb) p $fp
$44 = (void *) 0x7fffffffe520
(gdb) p $rbp
$45 = (void *) 0x7fffffffe550
(gdb) p $rsp
$46 = (void *) 0x7fffffffe528
(gdb) info frame
Stack level 0, frame at 0x7fffffffe530:
// 如果查看堆栈的信息,可以看到上个函数的rip在当前RSP位置:
(gdb) x /1xa $rsp
0x7fffffffe528: 0x40295e <st_usleep+188>
总结下,在setjmp/_st_md_cxt_save这个函数中几个关键寄存器的保存:
- RBX, RBP, R12~R15,直接保存到env(地址在rdi中),对应jmpbuf的0到5个元素。
- RSP,由于callq自动把前个函数的rip保存到了堆栈,所以我们+8后就是上个函数的RSP。
- PC,我们取RSP地址的内容,就是上颚函数的RIP也就是PC了。
不过我们知道了frame地址,也无法直接切换过去,比如下面两个coroutine:
(gdb) info frame
Stack level 1, frame at t:
rip = 0x4016d8 in _st_idle_thread_start (sched.c:231); saved rip 0x401a87
called by frame at 0x7ffff7fe9d80, caller of frame at 0x7ffff7fe9d30
source language c.
Arglist at 0x7ffff7fe9d50, args: arg=0x0
Locals at 0x7ffff7fe9d50, Previous frame's sp is 0x7ffff7fe9d60
Saved registers:
rbp at 0x7ffff7fe9d50, rip at 0x7ffff7fe9d58
(gdb) p/x _st_this_thread->context
$73 = {{__jmpbuf = {0x1000, 0x7ffff7fe9d50, 0x401030, 0x7fffffffe650, 0x0, 0x0, 0x7ffff7fe9d30, 0x4016ca}, __mask_was_saved = 0x0, __saved_mask = {
__val = {0x0 <repeats 16 times>}}}}
(gdb) f
#1 0x00000000004016d8 in _st_idle_thread_start (arg=0x0) at sched.c:231
231 _ST_SWITCH_CONTEXT(me);
(gdb) bt
#0 _st_iterate_threads () at sched.c:672
#1 0x00000000004016d8 in _st_idle_thread_start (arg=0x0) at sched.c:231
#2 0x0000000000401a87 in _st_thread_main () at sched.c:337
(gdb) info frame
Stack level 1, frame at 0x7fffffffe560:
rip = 0x40296c in st_usleep (sync.c:131); saved rip 0x40113b
called by frame at 0x7fffffffe580, caller of frame at 0x7fffffffe530
source language c.
Arglist at 0x7fffffffe550, args: usecs=1000000
Locals at 0x7fffffffe550, Previous frame's sp is 0x7fffffffe560
Saved registers:
rbp at 0x7fffffffe550, rip at 0x7fffffffe558
(gdb) p/x _st_this_thread->context
$74 = {{__jmpbuf = {0x0, 0x7fffffffe550, 0x401030, 0x7fffffffe650, 0x0, 0x0, 0x7fffffffe530, 0x40295e}, __mask_was_saved = 0x0, __saved_mask = {
__val = {0x0 <repeats 16 times>}}}}
(gdb) f
#1 0x000000000040296c in st_usleep (usecs=1000000) at sync.c:131
131 _ST_SWITCH_CONTEXT(me);
(gdb) bt
#0 _st_iterate_threads () at sched.c:672
#1 0x000000000040296c in st_usleep (usecs=1000000) at sync.c:131
#2 0x000000000040113b in main (argc=1, argv=0x7fffffffe658) at st0.cpp:10
它们的frame地址是0x7ffff7fe9d60和0x7fffffffe560,但是却不能随意使用frame xxx
切换,只能在当前backtrack时才能切换。
coroutine backtrace
当我们知道了线程的RBP,那么我们就可以知道整个堆栈,例如我们看当前线程的前一个线程:
// 先计算偏移量,从线程的链表获取到线程的偏移量
(gdb) p (int)(&(((_st_thread_t*)(0))->tlink))
$55 = 72
// 根据当前线程的链表,查找到前一个线程,其中RBP是139928880446560
p *(_st_thread_t*)((char*)_st_this_thread->tlink.prev-72)
__jmpbuf = {4096, 139928880446560, 139928887965408, 8313290, 2266154456, 2274288280, 139928880446528, 5925227}
// 查看RBP开始的两个指针,就是前一个函数的入口和前前函数的RBP,
// 以此类推,可以知道整个调用链条。
(gdb) x /2xa 139928880446560
0x7f43bb367c60: 0x7f43bb367c80 0x4aa9c6 <srs_usleep(long)+24>
(gdb) x/2xa 0x7f43bb367c80
0x7f43bb367c80: 0x7f43bb367cf0 0x5919fd <SrsNgExec::cycle()+381>
(gdb) x/2xa 0x7f43bb367cf0
0x7f43bb367cf0: 0x7f43bb367d20 0x4f9f96 <SrsSTCoroutine::cycle()+142>
(gdb) x/2xa 0x7f43bb367d20
0x7f43bb367d20: 0x7f43bb367d50 0x4fa00b <SrsSTCoroutine::pfn(void*)+43>
(gdb) x/2xa 0x7f43bb367d50
0x7f43bb367d50: 0x7f43bb367d70 0x5a5a92 <_st_thread_main+40>
注意:如果是当前线程,这个RBP可能是不对的,因为jmpbuf保存的是之前的一个位置。当前coroutine直接用bt就可以看到堆栈。
discovery in coredump
第一个coroutine是idle,第二个就是main,也就是SrsServer的coroutine,我们可以看它的调用堆栈:
(gdb) p *(_st_thread_t*)((char*)_st_this_vp.thread_q.next.next-72)
$14 = {state = 5, flags = 5, start = 0x0, arg = 0x0, retval = 0x0, stack = 0x0, links = {next = 0x9d7ab0 <_st_this_vp+16>,
prev = 0x9d7ab0 <_st_this_vp+16>}, wait_links = {next = 0x0, prev = 0x0}, tlink = {next = 0x7f4444ce0e78, prev = 0x7f4444cf2e78},
due = 1603639789498746, left = 0x7f43ba533e30, right = 0x7f43b493be30, heap_index = 44, private_data = 0xc2e320, term = 0x0, context = {{
__jmpbuf = {12646464, 140721635898384, 12653800, 12654984, 0, 0, 140721635898352, 5925227}, __mask_was_saved = 0, __saved_mask = {__val = {
0 <repeats 16 times>}}}}}
(gdb) x/2xa 140721635898384
0x7ffc4f1eb410: 0x7ffc4f1eb430 0x4aa9c6 <srs_usleep(long)+24>
(gdb) x/2xa 0x7ffc4f1eb430
0x7ffc4f1eb430: 0x7ffc4f1eb490 0x4bf2d8 <SrsServer::do_cycle()+408>
这里我们可以看到SrsServer对象了,可以找找它的this指针:
(gdb) x/4xa 0x7ffc4f1eb490
0x7ffc4f1eb490: 0x7ffc4f1eb4f0 0x4bec9b <SrsServer::cycle()+159>
0x7ffc4f1eb4a0: 0xc30518 0xc0f840
// 可以看到第四个指针就是this
(gdb) x/2xa 0xc0f840
0xc0f840: 0x6b1738 <_ZTV9SrsServer+344> 0xc0fad0
// 打印它的内容,是可以对上的:
(gdb) p *(SrsServer*)0xc0f840
$16 = {<ISrsReloadHandler> = {_vptr.ISrsReloadHandler = 0x6b1738 <vtable for SrsServer+344>}, <ISrsSourceHandler> = {
_vptr.ISrsSourceHandler = 0x6b1970 <vtable for SrsServer+912>}, <IConnectionManager> = {
_vptr.IConnectionManager = 0x6b19b0 <vtable for SrsServer+976>}, http_api_mux = 0xc0fad0, http_server = 0xc0fb60, http_heartbeat = 0xc0fcb0,
ingester = 0xc0f780, conn_manager = 0xc0f950, pid_fd = 8, conns = std::vector of length 71, capacity 256 = {0xc30d28, 0xcbbcb8, 0xc76858,
0xd4fb18, 0xcd3938, 0xcd4ae8, 0xd744518, 0x1032a528, 0x495c9e08, 0x47fccbe8, 0x67f14018, 0x70906728, 0x77ce0c38, 0x7b87deb8, 0x76f313b8,
0x7e8148b8, 0x814987c8, 0x8356dff8, 0x82ba8bd0, 0x83489cf8, 0x81a10aa8, 0x863debf8, 0x83b977d8, 0x797c59d8, 0x870b0b28, 0x8b5591e8,
0x8a8f60e8, 0x8c31b748, 0x8d6cf8a8, 0x8c6268a8, 0x8e0d2758, 0x8d951218, 0x8b42aed8, 0x8ae91d88, 0x889791b8, 0x911c76f8, 0x9118c468,
0x93a248a8, 0x917bb4a8, 0x924516a8, 0x8fb1d8c0, 0x9365bf18, 0x94002458, 0x92ad37d8, 0x90c0e068, 0x93817448, 0x921cf848, 0x929e3b78,
0x9039a6b8, 0x92d62518, 0x92bf2ed8, 0x939e5348, 0x91e52718, 0x92b2c198, 0x9470dca8, 0x92ac0b38, 0x94707a88, 0x9456f718, 0x93432b98,
0x93f4f5c8, 0x91bb6f98, 0x8d7b6788, 0x92d33778, 0x92d61cb8, 0x93a17a88, 0x91fb9188, 0x924c3e60, 0x91f63508, 0x8b72e2d8, 0x93242ca8,
0x947c3e18}, listeners = std::vector of length 3, capacity 4 = {0xc2e4b0, 0xc2e6d0, 0xc2e860}, signal_manager = 0xc0f810, handler = 0x0,
signal_reload = false, signal_persistence_config = false, signal_gmc_stop = false, signal_fast_quit = false, signal_gracefully_quit = false,
ppid = 1}
from state-threads.
GDB nn_coroutines
SRS提供脚本nn_coroutines,可以看当前或者coredump中有多少个coroutines:
(gdb) source gdb/srs.py
(gdb) nn_coroutines
this coroutine(&_st_this_thread->tlink) is: 0x7f43ba761e78
next is 0x7f43b92d9e78, total 500
next is 0x7f43b5c37e78, total 1000
next is 0x7f43bfd71e78, total 31500
next is 0x7f43bdad9e78, total 32000
next is 0x7f43bd8f3e78, total 32500
total coroutines: 32717
from state-threads.
GDB show_coroutines
SRS提供脚本show_coroutines,可以看当前或者coredump中每个coroutine的调用函数:
(gdb) source gdb/srs.py
(gdb) show_coroutines
offset=72, _st_this_thread=0xdefa90, pthis-offset=0xdefa90
thread: 0xdefa90, caller: 0x4c5d37 <srs_usleep(long)+24>
thread: 0xe03e00, caller: 0x4c5d37 <srs_usleep(long)+24>
thread: 0xe25e20, caller: 0x63bca4 <st_netfd_poll+62>
thread: 0xe37e30, caller: 0x63b1b4 <st_cond_wait+31>
thread: 0xe4a0f0, caller: 0x63bca4 <st_netfd_poll+62>
thread: 0xe5c2a0, caller: 0x63bca4 <st_netfd_poll+62>
thread: 0xe6e480, caller: 0x63bca4 <st_netfd_poll+62>
thread: 0xe80580, caller: 0x63b1b4 <st_cond_wait+31>
thread: 0xe925f0, caller: 0x63bca4 <st_netfd_poll+62>
thread: 0xea6b80, caller: 0x4c5d37 <srs_usleep(long)+24>
thread: 0xeb8bf0, caller: 0x63bca4 <st_netfd_poll+62>
thread: 0xee59d0, caller: 0x4c5c68 <srs_cond_timedwait(void*, long)+35>
thread: 0xf22c40, caller: 0x63b1b4 <st_cond_wait+31>
thread: 0xf34c50, caller: 0x4c5d37 <srs_usleep(long)+24>
thread: 0x7ffff7fc6e40, caller: 0x63bca4 <st_netfd_poll+62>
thread: 0xdee8b0, caller: 0x63a009 <_st_thread_main+40>
from state-threads.
Related Issues (20)
- Support MSG_ZEROCOPY for streaming server. HOT 9
- UDP接收中文乱码 HOT 1
- Performance improvement for st_usleep.
- 支持iOS和安卓吗? HOT 4
- Support Multiple Threads for Linux and Darwin only. HOT 4
- Cygwin: ST Support Windows 64bits HOT 2
- Support MIPS for OpenWRT HOT 1
- How to porting ST to other OS/CPU? 如何移植ST到其他系统或CPU?
- Support daemon(fork twice) for Darwin/OSX HOT 1
- Support Loongson CPU arch
- aosp中编译srs-server及libst报错 HOT 2
- 如何使st库支持多线程? HOT 1
- AppleM1: Support Apple Silicon M1(aarch64). HOT 1
- Add utest for threads and codes.
- About How to Support EPOLLET Issue in ST
- Plan: Migrate to C++98/MIT. 计划迁移到C++98/MIT.
- Support backtrace and backtrace_symbols
- 如果协程释放将内存也同时释放,为什么会崩溃 HOT 2
- 增加可以将协程内存释放的功能 HOT 1
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from state-threads.