Open7

WasmLinux: USB/IPの初期化を遅延させたい

okuokuokuoku

要はこの辺のシンボルの初期化を別に分けておきたい。

extern initcall_t __initcall__kmod_usbip_core__431_847_usbip_core_init6;
extern initcall_t __initcall__kmod_vhci_hcd__237_1574_vhci_hcd_init6;

たぶん問題を起こしているのは vhci_hcd の方なので、一旦これだけ初期化リストから外して問題が解消するか様子を見てみることにする。

okuokuokuoku

でも初期化するとハングする

diff --git a/_hostwasm/hostwasm_main.c b/_hostwasm/hostwasm_main.c
index 371bc42b567e..e35b09230f8a 100644
--- a/_hostwasm/hostwasm_main.c
+++ b/_hostwasm/hostwasm_main.c
@@ -131,6 +131,9 @@ syscall(uint32_t no, uint32_t nargs, uint32_t* in){
     return lkl_syscall(no, nargs, (long*)in);
 }

+typedef int (*initcall_t)(void);
+extern initcall_t __initcall__kmod_vhci_hcd__237_1574_vhci_hcd_init6;
+
 void __attribute__((export_name ("init")))
 init(void){
     copy_init_thread_info();
@@ -138,4 +141,5 @@ init(void){
     host_lkl_inittbl();
     lkl_init(lklhost_getops());
     lkl_start_kernel("mem=64M"); // FIXME
+    __initcall__kmod_vhci_hcd__237_1574_vhci_hcd_init6();
 }

うーん。。

引数は正常に見えるのでjiffies換算の問題では無さそうだ。

w2c_kernel_usb_start_wait_urb(instance=0x0000000002963200, var_p0=942984960, var_p1=5000, var_p2=2329228) at lin.c:1669699
w2c_kernel_wait_for_completion_timeout(instance=0x0000000002963200, var_p0=2329200, var_p1=500) at lin.c:1470857

で、長時間待っても、

w2c_kernel_schedule_timeout(instance=0x0000000002963200, var_p0=499) at lin.c:1540749

var_p0 が減っていないのが微妙に気になる。

okuokuokuoku

_mod_timer してもタイマを設定しに来ない

jiffiesはロールオーバー直前で起動するようになっているので負値に見えるのはオッケーなのかな。

https://github.com/lkl/linux/blob/3023e6f25fbf6d5f95b4e7ebd011fa688434ce5f/kernel/time/timer.c#L1013

というか今気付いたけど、Windowsだとタイマの動作自体が狂っているのに同じようにVHCIの初期化で止まる。つまり、初手からタイマが狂ってる可能性が出てきたな。。

okuokuokuoku

なんか警告が出ることがある

[  130.201499] ------------[ cut here ]------------

Done: 749
Wait: 1760700
[  130.212022] WARNING: CPU: 0 PID: 23 at kernel/sched/core.c:5065 0xdeadcafe

Spurious wakeup!: 1760700 again
Wait: 1760700
[  130.223346] corrupted preempt_count: idle_host_task/23/0x10000

Fire: 749
Done: 749
[  130.236573] CPU: 0 PID: 23 Comm: idle_host_task Not tainted 6.1.0+ #

https://github.com/lkl/linux/blob/3023e6f25fbf6d5f95b4e7ebd011fa688434ce5f/kernel/sched/core.c#L5052-L5066

(Spurious wakeup! はランタイム側の警告 -- どうもWindowsのdebug CRTではcondition variableのspurious wakeupが割と発生するようだ)

再現性が無いので完全に謎だ。。

okuokuokuoku

なぜかメインスレッドがidleスレッド扱いされている

USB/IPの初期化も含め、起動時には2回 schedule_timeout が実行される。1回目はkswapdでそれは問題なし。2回目がUSB/IPでハングになる。このとき、1回目の schedule_timeout でデバッグ停止してから再開すると2回目で、

Oneshot timer: 8 5965500
[   15.022940] bad: scheduling from the idle thread!

Rearm: 5965500
Wait: 5965500
[   15.032058] ------------[ cut here ]------------

[   15.032103] WARNING: CPU: 0 PID: 0 at kernel/sched/core.c:6623 0xdeadcafe

Fire: 749
[   15.032374] CPU: 0 PID: 0 Comm: swapper Not tainted 6.1.0+ #

Done: 749
Oneshot timer: 8 8707500
Wait: 8707500
Fire: 749
Done: 749

のように警告される。これはかなり激烈におかしい。TLSが間違ってるとか。。?

>	runner.exe!w2c_kernel_dequeue_task_idle(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x00163c80, unsigned int var_p1=0x00161d80, unsigned int var_p2=0x00000009) Line 1631402	C	Symbols loaded.
 	runner.exe!w2c_kernel_0x5F_schedule(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x00000000) Line 1427120	C	Symbols loaded.
 	runner.exe!w2c_kernel_schedule(w2c_kernel * instance=0x00007ff780f02e80) Line 1427533	C	Symbols loaded.
 	runner.exe!w2c_kernel_schedule_timeout(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x000001f4) Line 1542992	C	Symbols loaded.
 	runner.exe!w2c_kernel_wait_for_common(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x00239bc0, unsigned int var_p1=0x000001f4, unsigned int var_p2=0x00000002) Line 1471294	C	Symbols loaded.
 	runner.exe!w2c_kernel_wait_for_completion_timeout(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x00239bc0, unsigned int var_p1=0x000001f4) Line 1471376	C	Symbols loaded.
 	runner.exe!w2c_kernel_usb_start_wait_urb(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x3834cf00, unsigned int var_p1=0x00001388, unsigned int var_p2=0x00239bdc) Line 1673743	C	Symbols loaded.
 	runner.exe!w2c_kernel_usb_control_msg(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x383e6c00, unsigned int var_p1=0x80000180, unsigned int var_p2=0x00000006, unsigned int var_p3=0x00000080, unsigned int var_p4=0x00000100, unsigned int var_p5=0x3834cf00, unsigned int var_p6=0x383f11e0, unsigned int var_p7=0x00000012, unsigned int var_p8=0x00001388) Line 1673648	C	Symbols loaded.
 	runner.exe!w2c_kernel_usb_get_descriptor(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x383e6c00, unsigned int var_p1=0x00000001, unsigned int var_p2=0x00000000, unsigned int var_p3=0x383f11e0, unsigned int var_p4=0x00000012) Line 1675090	C	Symbols loaded.
 	runner.exe!w2c_kernel_usb_get_device_descriptor(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x383e6c00, unsigned int var_p1=0x00000012) Line 1675822	C	Symbols loaded.
 	runner.exe!w2c_kernel_register_root_hub(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x383df800) Line 1698651	C	Symbols loaded.
 	runner.exe!w2c_kernel_usb_add_hcd(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x383df800, unsigned int var_p1=0x00000000, unsigned int var_p2=0x00000000) Line 1698211	C	Symbols loaded.
 	runner.exe!w2c_kernel_vhci_hcd_probe(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x383dba00) Line 1703482	C	Symbols loaded.
 	runner.exe!w2c_kernel_platform_probe(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x383dba10) Line 522221	C	Symbols loaded.
 	runner.exe!w2c_kernel_really_probe(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x383dba10, unsigned int var_p1=0x001cdb50) Line 463568	C	Symbols loaded.
 	runner.exe!w2c_kernel_driver_probe_device(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x001cdb50, unsigned int var_p1=0x383dba10) Line 464100	C	Symbols loaded.
 	runner.exe!w2c_kernel_0x5F_device_attach_driver(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x001cdb50, unsigned int var_p1=0x00239ce8) Line 463247	C	Symbols loaded.
 	runner.exe!w2c_kernel_bus_for_each_drv(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x00000000, unsigned int var_p1=0x001cdb50, unsigned int var_p2=0x00239ce8, unsigned int var_p3=0x0000025a) Line 1139857	C	Symbols loaded.
 	runner.exe!w2c_kernel_0x5F_device_attach(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x383dba10, unsigned int var_p1=0x00000001) Line 463093	C	Symbols loaded.
 	runner.exe!w2c_kernel_device_initial_probe(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x383dba10) Line 463317	C	Symbols loaded.
 	runner.exe!w2c_kernel_bus_probe_device(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x383dba10) Line 1140011	C	Symbols loaded.
 	runner.exe!w2c_kernel_device_add(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x383dba10) Line 1087993	C	Symbols loaded.
 	runner.exe!w2c_kernel_platform_device_add(w2c_kernel * instance=0x00007ff780f02e80, unsigned int var_p0=0x383dba00) Line 521734	C	Symbols loaded.
 	runner.exe!w2c_kernel_vhci_hcd_init(w2c_kernel * instance=0x00007ff780f02e80) Line 1703297	C	Symbols loaded.
 	runner.exe!w2c_kernel_init_0(w2c_kernel * instance=0x00007ff780f02e80) Line 1707759	C	Symbols loaded.
 	runner.exe!w2c_kernel_init(w2c_kernel * instance=0x00007ff780f02e80) Line 191181	C	Symbols loaded.
 	runner.exe!main(int ac=0x00000001, char * * av=0x0000027425166050) Line 895	C++	Symbols loaded.
 	runner.exe!invoke_main() Line 79	C++	Symbols loaded.
 	runner.exe!__scrt_common_main_seh() Line 288	C++	Symbols loaded.
 	runner.exe!__scrt_common_main() Line 331	C++	Symbols loaded.
 	runner.exe!mainCRTStartup(void * __formal=0x000000de62edc000) Line 17	C++	Symbols loaded.
 	kernel32.dll!BaseThreadInitThunk()	Unknown	Symbols loaded.
 	ntdll.dll!RtlUserThreadStart()	Unknown	Symbols loaded.
okuokuokuoku

syscallコンテキストなら大丈夫

... 大丈夫なのか。。? swapon のsyscall番号を間借りして、これを呼んだら続きの初期化がされるように修正したらうまくいった。

要するに host0 の割り当てが遅すぎるのが問題だな。初期スレッドを作成したタイミングでhost0はアサインされていないといけない。はず。とりあえずワークアラウンドとしては↓で。

diff --git a/_hostwasm/hostwasm_main.c b/_hostwasm/hostwasm_main.c
index 371bc42b567e..4266d5fb1446 100644
--- a/_hostwasm/hostwasm_main.c
+++ b/_hostwasm/hostwasm_main.c
@@ -131,6 +131,9 @@ syscall(uint32_t no, uint32_t nargs, uint32_t* in){
     return lkl_syscall(no, nargs, (long*)in);
 }

+typedef int (*initcall_t)(void);
+extern initcall_t __initcall__kmod_vhci_hcd__237_1574_vhci_hcd_init6;
+
 void __attribute__((export_name ("init")))
 init(void){
     copy_init_thread_info();
@@ -138,4 +141,5 @@ init(void){
     host_lkl_inittbl();
     lkl_init(lklhost_getops());
     lkl_start_kernel("mem=64M"); // FIXME
+    //__initcall__kmod_vhci_hcd__237_1574_vhci_hcd_init6();
 }
diff --git a/arch/lkl/kernel/syscalls.c b/arch/lkl/kernel/syscalls.c
index c53e9c005c7c..ac73ba48ac6c 100644
--- a/arch/lkl/kernel/syscalls.c
+++ b/arch/lkl/kernel/syscalls.c
@@ -28,6 +28,18 @@ typedef long (*syscall_handler3_t)(long arg1, long arg2, long arg3);
 typedef long (*syscall_handler4_t)(long arg1, long arg2, long arg3, long arg4);
 typedef long (*syscall_handler5_t)(long arg1, long arg2, long arg3, long arg4, long arg5);
 typedef long (*syscall_handler6_t)(long arg1, long arg2, long arg3, long arg4, long arg5, long arg6);
+
+
+/* FIXME: stub */
+typedef int (*initcall_t)(void);
+extern initcall_t __initcall__kmod_vhci_hcd__237_1574_vhci_hcd_init6;
+
+static long
+callrestinit(void){
+    __initcall__kmod_vhci_hcd__237_1574_vhci_hcd_init6();
+    return 0;
+}
+
 #endif

 #undef __SYSCALL
@@ -40,6 +52,9 @@ syscall_handler_t syscall_table[__NR_syscalls] = {
 #if __BITS_PER_LONG == 32
 #include <asm/unistd_32.h>
 #endif
+#ifdef __wasm__
+        [224 /* swapon */] = (syscall_handler_t)callrestinit
+#endif
 };

 #ifndef __wasm__