龙芯:https://www.loongson.cn/
适配芯片(CPU):
适配系统(OS):
指令集和汇编:
已经支持的龙芯CPU列表:
先确定龙芯的宏定义:
g++ -dM -E - </dev/null |grep -i loong
可以看到应该是__loongarch__
,或者__loongarch64
。
Note: __loongarch64
和__loongarch32
都属于__loongarch__
,服务器都是64bits,所以我们只需要适配__loongarch64
就可以了。
寄存器的具体用法参考下图:
- r0, zero, Constant zero
- r1, ra, Return address,返回地址。
- r2, tp, TLS(Thread Local Storage),和TLS相关。
- r3, sp, Stack pointer,堆栈寄存器。
- r4-r11, a0-a7, Argument registers,参数寄存器。
- r4-r5, v0-v1, Return value,返回值。
- r12-r20, t0-t8, Temp registers,临时寄存器。
- r21, x, Reserved, 保留寄存器。
- r22, fp, Frame pointer,Frame寄存器。
- r23-r31, s0-s8, Subroutine register variable,子函数寄存器。
主要保存的寄存器如下:
- r3, sp, Stack pointer,堆栈寄存器。
- r22, fp, Frame pointer,Frame寄存器。
- r23-r31, s0-s8, Subroutine register variable,子函数寄存器。
修改并编译porting.c,调试程序,设置显示汇编:
(gdb) set disassemble-next-line on
观察下函数调用指令,调试foo_return_zero
:
47 int r0 = foo_return_zero();
=> 0x00000001200008bc <main+196>: 00 78 00 54 bl 120(0x78) # 0x120000934 <foo_return_zero>
0x00000001200008c0 <main+200>: 8c 00 15 00 move $r12,$r4
0x00000001200008c4 <main+204>: cc b2 bf 29 st.w $r12,$r22,-20(0xfec)
foo_return_zero () at porting.c:59
59 {
=> 0x0000000120000934 <foo_return_zero+0>: 63 c0 ff 02 addi.d $r3,$r3,-16(0xff0)
0x0000000120000938 <foo_return_zero+4>: 76 20 c0 29 st.d $r22,$r3,8(0x8)
0x000000012000093c <foo_return_zero+8>: 76 40 c0 02 addi.d $r22,$r3,16(0x10)
60 return 0;
=> 0x0000000120000940 <foo_return_zero+12>: 0c 00 15 00 move $r12,$r0
61 }
=> 0x0000000120000944 <foo_return_zero+16>: 84 01 15 00 move $r4,$r12
0x0000000120000948 <foo_return_zero+20>: 76 20 c0 28 ld.d $r22,$r3,8(0x8)
0x000000012000094c <foo_return_zero+24>: 63 40 c0 02 addi.d $r3,$r3,16(0x10)
0x0000000120000950 <foo_return_zero+28>: 20 00 00 4c jirl $r0,$r1,0
- bl:调用函数的指令。
- r4:保存返回值的寄存器。
- r3:作为sp寄存器,进入函数和返回时对r3的操作相当于push和pop。
看下带参数的函数foo_return_one_arg1
:
49 int r2 = foo_return_one_arg1(r1);
=> 0x00000001200008d4 <main+220>: cc ea ff 24 ldptr.w $r12,$r22,-24(0xffe8)
0x00000001200008d8 <main+224>: 84 01 15 00 move $r4,$r12
0x00000001200008dc <main+228>: 00 98 00 54 bl 152(0x98) # 0x120000974 <foo_return_one_arg1>
0x00000001200008e0 <main+232>: 8c 00 15 00 move $r12,$r4
0x00000001200008e4 <main+236>: cc 92 bf 29 st.w $r12,$r22,-28(0xfe4)
foo_return_one_arg1 (r0=1) at porting.c:69
69 {
=> 0x0000000120000974 <foo_return_one_arg1+0>: 63 80 ff 02 addi.d $r3,$r3,-32(0xfe0)
0x0000000120000978 <foo_return_one_arg1+4>: 76 60 c0 29 st.d $r22,$r3,24(0x18)
0x000000012000097c <foo_return_one_arg1+8>: 76 80 c0 02 addi.d $r22,$r3,32(0x20)
0x0000000120000980 <foo_return_one_arg1+12>: 8c 00 15 00 move $r12,$r4
0x0000000120000984 <foo_return_one_arg1+16>: 8c 81 40 00 slli.w $r12,$r12,0x0
0x0000000120000988 <foo_return_one_arg1+20>: cc b2 bf 29 st.w $r12,$r22,-20(0xfec)
70 return r0 + 2;
=> 0x000000012000098c <foo_return_one_arg1+24>: cc b2 bf 28 ld.w $r12,$r22,-20(0xfec)
0x0000000120000990 <foo_return_one_arg1+28>: 8c 09 80 02 addi.w $r12,$r12,2(0x2)
71 }
=> 0x0000000120000994 <foo_return_one_arg1+32>: 84 01 15 00 move $r4,$r12
0x0000000120000998 <foo_return_one_arg1+36>: 76 60 c0 28 ld.d $r22,$r3,24(0x18)
0x000000012000099c <foo_return_one_arg1+40>: 63 80 c0 02 addi.d $r3,$r3,32(0x20)
0x00000001200009a0 <foo_return_one_arg1+44>: 20 00 00 4c jirl $r0,$r1,0
调试setjmp
函数:
#0 print_jmpbuf () at porting.c:141
141 int r0 = setjmp(ctx);
=> 0x00000001200009ec <print_jmpbuf+72>: cc c2 f9 02 addi.d $r12,$r22,-400(0xe70)
0x00000001200009f0 <print_jmpbuf+76>: 84 01 15 00 move $r4,$r12
0x00000001200009f4 <print_jmpbuf+80>: ff 7f fc 57 bl -900(0xffffc7c) # 0x120000670 <_setjmp@plt>
0x00000001200009f8 <print_jmpbuf+84>: 8c 00 15 00 move $r12,$r4
0x00000001200009fc <print_jmpbuf+88>: cc b2 be 29 st.w $r12,$r22,-84(0xfac)
(gdb) disassemble
Dump of assembler code for function _setjmp@plt:
=> 0x0000000120000670 <+0>: pcaddu12i $r15,8(0x8)
0x0000000120000674 <+4>: ld.d $r15,$r15,-1624(0x9a8)
0x0000000120000678 <+8>: pcaddu12i $r13,0
0x000000012000067c <+12>: jirl $r0,$r15,0
(gdb) disassemble
Dump of assembler code for function __sigsetjmp:
=> 0x000000fff7e943b8 <+0>: st.d $r1,$r4,0
0x000000fff7e943bc <+4>: st.d $r3,$r4,8(0x8)
0x000000fff7e943c0 <+8>: st.d $r21,$r4,16(0x10)
0x000000fff7e943c4 <+12>: st.d $r22,$r4,24(0x18)
0x000000fff7e943c8 <+16>: st.d $r23,$r4,32(0x20)
0x000000fff7e943cc <+20>: st.d $r24,$r4,40(0x28)
0x000000fff7e943d0 <+24>: st.d $r25,$r4,48(0x30)
0x000000fff7e943d4 <+28>: st.d $r26,$r4,56(0x38)
0x000000fff7e943d8 <+32>: st.d $r27,$r4,64(0x40)
0x000000fff7e943dc <+36>: st.d $r28,$r4,72(0x48)
0x000000fff7e943e0 <+40>: st.d $r29,$r4,80(0x50)
0x000000fff7e943e4 <+44>: st.d $r30,$r4,88(0x58)
0x000000fff7e943e8 <+48>: st.d $r31,$r4,96(0x60)
0x000000fff7e943ec <+52>: fst.d $f24,$r4,104(0x68)
0x000000fff7e943f0 <+56>: fst.d $f25,$r4,112(0x70)
0x000000fff7e943f4 <+60>: fst.d $f26,$r4,120(0x78)
0x000000fff7e943f8 <+64>: fst.d $f27,$r4,128(0x80)
0x000000fff7e943fc <+68>: fst.d $f28,$r4,136(0x88)
0x000000fff7e94400 <+72>: fst.d $f29,$r4,144(0x90)
0x000000fff7e94404 <+76>: fst.d $f30,$r4,152(0x98)
0x000000fff7e94408 <+80>: fst.d $f31,$r4,160(0xa0)
0x000000fff7e9440c <+84>: b 4(0x4) # 0xfff7e94410 <__sigjmp_save>
(gdb) disassemble
Dump of assembler code for function __sigjmp_save:
=> 0x000000fff7e94410 <+0>: addi.d $r3,$r3,-16(0xff0)
0x000000fff7e94414 <+4>: stptr.d $r23,$r3,0
0x000000fff7e94418 <+8>: st.d $r1,$r3,8(0x8)
0x000000fff7e9441c <+12>: move $r23,$r4
0x000000fff7e94420 <+16>: bnez $r5,32(0x20) # 0xfff7e94440 <__sigjmp_save+48>
0x000000fff7e94424 <+20>: ld.d $r1,$r3,8(0x8)
0x000000fff7e94428 <+24>: st.w $r5,$r23,168(0xa8)
0x000000fff7e9442c <+28>: move $r4,$r0
0x000000fff7e94430 <+32>: ldptr.d $r23,$r3,0
0x000000fff7e94434 <+36>: addi.d $r3,$r3,16(0x10)
0x000000fff7e94438 <+40>: jirl $r0,$r1,0
0x000000fff7e9443c <+44>: andi $r0,$r0,0x0
0x000000fff7e94440 <+48>: addi.d $r6,$r4,176(0xb0)
0x000000fff7e94444 <+52>: move $r5,$r0
0x000000fff7e94448 <+56>: move $r4,$r0
0x000000fff7e9444c <+60>: bl 1924(0x784) # 0xfff7e94bd0 <sigprocmask>
0x000000fff7e94450 <+64>: ld.d $r1,$r3,8(0x8)
0x000000fff7e94454 <+68>: sltui $r5,$r4,1(0x1)
0x000000fff7e94458 <+72>: st.w $r5,$r23,168(0xa8)
0x000000fff7e9445c <+76>: move $r4,$r0
0x000000fff7e94460 <+80>: ldptr.d $r23,$r3,0
0x000000fff7e94464 <+84>: addi.d $r3,$r3,16(0x10)
0x000000fff7e94468 <+88>: jirl $r0,$r1,0
(gdb) p &ctx
$25 = (jmp_buf *) 0xffffff3110
(gdb) p/x $r12
$27 = 0xffffff3110
调试longjmp
函数:
(gdb) disassemble
Dump of assembler code for function __longjmp:
=> 0x000000fff7e944c0 <+0>: ld.d $r1,$r4,0
0x000000fff7e944c4 <+4>: ld.d $r3,$r4,8(0x8)
0x000000fff7e944c8 <+8>: ld.d $r21,$r4,16(0x10)
0x000000fff7e944cc <+12>: ld.d $r22,$r4,24(0x18)
0x000000fff7e944d0 <+16>: ld.d $r23,$r4,32(0x20)
0x000000fff7e944d4 <+20>: ld.d $r24,$r4,40(0x28)
0x000000fff7e944d8 <+24>: ld.d $r25,$r4,48(0x30)
0x000000fff7e944dc <+28>: ld.d $r26,$r4,56(0x38)
0x000000fff7e944e0 <+32>: ld.d $r27,$r4,64(0x40)
0x000000fff7e944e4 <+36>: ld.d $r28,$r4,72(0x48)
0x000000fff7e944e8 <+40>: ld.d $r29,$r4,80(0x50)
0x000000fff7e944ec <+44>: ld.d $r30,$r4,88(0x58)
0x000000fff7e944f0 <+48>: ld.d $r31,$r4,96(0x60)
0x000000fff7e944f4 <+52>: fld.d $f24,$r4,104(0x68)
0x000000fff7e944f8 <+56>: fld.d $f25,$r4,112(0x70)
0x000000fff7e944fc <+60>: fld.d $f26,$r4,120(0x78)
0x000000fff7e94500 <+64>: fld.d $f27,$r4,128(0x80)
0x000000fff7e94504 <+68>: fld.d $f28,$r4,136(0x88)
0x000000fff7e94508 <+72>: fld.d $f29,$r4,144(0x90)
0x000000fff7e9450c <+76>: fld.d $f30,$r4,152(0x98)
0x000000fff7e94510 <+80>: fld.d $f31,$r4,160(0xa0)
0x000000fff7e94514 <+84>: sltui $r4,$r5,1(0x1)
0x000000fff7e94518 <+88>: add.d $r4,$r4,$r5
0x000000fff7e9451c <+92>: jirl $r0,$r1,0
具体寄存器的布局,我们选择的是:
#define JB_SP 0 /* R3, SP, Stack pointer */
#define JB_RA 1 /* R1, RA, Return address */
#define JB_FP 2 /* FP/R22 Frame pointer */
#define JB_S0 3 /* R23-R31, S0-S8, Subroutine register variable */
#define JB_S1 4 /* R23-R31, S0-S8, Subroutine register variable */
#define JB_S2 5 /* R23-R31, S0-S8, Subroutine register variable */
#define JB_S3 6 /* R23-R31, S0-S8, Subroutine register variable */
#define JB_S4 7 /* R23-R31, S0-S8, Subroutine register variable */
#define JB_S5 8 /* R23-R31, S0-S8, Subroutine register variable */
#define JB_S6 9 /* R23-R31, S0-S8, Subroutine register variable */
#define JB_S7 10 /* R23-R31, S0-S8, Subroutine register variable */
#define JB_S8 11 /* R23-R31, S0-S8, Subroutine register variable */
我们将SP放在最开始的8字节,固定的位置,主要是为了方便更换SP。
适配完成后,可以用verify
工具,查看jmpbuf的布局:
[root@host-192-168-100-6 verify]# pwd
/root/git/state-threads/tools/verify
[root@host-192-168-100-6 verify]# ./verify
sp=0xfffbed2b20, ra=0x1200007ac, fp=0xfffbed2cf0, s0=(nil), s1=0x1200009e8, s2=(nil), s3=(nil), s4=0xaab5cb9fc0, s5=0xaab5baad30, s6=0xaab5bae8b0, s7=(nil), s7=(nil)
0x20 0x2b 0xed 0xfb 0xff 0x00 0x00 0x00
0xac 0x07 0x00 0x20 0x01 0x00 0x00 0x00
0xf0 0x2c 0xed 0xfb 0xff 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0xe8 0x09 0x00 0x20 0x01 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0xc0 0x9f 0xcb 0xb5 0xaa 0x00 0x00 0x00
0x30 0xad 0xba 0xb5 0xaa 0x00 0x00 0x00
0xb0 0xe8 0xba 0xb5 0xaa 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
运行helloworld,会创建ST协程不断打印:
[root@host-192-168-100-6 helloworld]# pwd
/root/git/state-threads/tools/helloworld
[root@host-192-168-100-6 helloworld]# ./helloworld
#000, Hello, state-threads world!
#001, Hello, state-threads world!
#002, Hello, state-threads world!
这意味着ST就成功适配了。