Opened 3 months ago

#18991 new bug

Deadlock on network notifications unload during boot

Reported by: waddlesplash Owned by: axeld
Priority: normal Milestone: Unscheduled
Component: Network & Internet/Stack Version: R1/beta5
Keywords: Cc:
Blocked By: Blocking:
Platform: All

Description

A boot hung on the rocket for me today. Poking around in KDL found that a process started by launch_daemon was hung on team deletion:

kdebug> bt 75
stack trace for thread 75 "uname"
    kernel stack: 0xffffffff81da8000 to 0xffffffff81dad000
      user stack: 0x00007f4bf0850000 to 0x00007f4bf1850000
frame                       caller             <image>:function + offset
 0 ffffffff81dac4c0 (+  80) ffffffff80275942   <kernel_x86_64> arch_thread_context_switch + 0x170
 1 ffffffff81dac4f0 (+  48) ffffffff80179c68   <kernel_x86_64> switch_thread(BKernel::Thread*, BKernel::Thread*) + 0xbd
 2 ffffffff81dac5b0 (+ 192) ffffffff8017a273   <kernel_x86_64> reschedule(int) + 0x5fc
 3 ffffffff81dac5e0 (+  48) ffffffff8017a306   <kernel_x86_64> scheduler_reschedule + 0x79
 4 ffffffff81dac600 (+  32) ffffffff80166dd8   <kernel_x86_64> thread_block_locked(BKernel::Thread*) + 0x51
 5 ffffffff81dac630 (+  48) ffffffff80166e1c   <kernel_x86_64> thread_block + 0x38
 6 ffffffff81dac6a0 (+ 112) ffffffff80175875   <kernel_x86_64> _mutex_lock + 0x193
 7 ffffffff81dac6c0 (+  32) ffffffff80173d2a   <kernel_x86_64> mutex_lock + 0x1d
 8 ffffffff81dac6f0 (+  48) ffffffff80173f68   <kernel_x86_64> recursive_lock_lock + 0x75
 9 ffffffff81dac710 (+  32) ffffffff80181fa2   <kernel_x86_64> BPrivate::RecursiveLockLocking::Lock(recursive_lock*) + 0x1c
10 ffffffff81dac730 (+  32) ffffffff8018229b   <kernel_x86_64> BPrivate::AutoLocker<recursive_lock, BPrivate::RecursiveLockLocking>::Lock() + 0x41
11 ffffffff81dac760 (+  48) ffffffff80182117   <kernel_x86_64> BPrivate::AutoLocker<recursive_lock, BPrivate::RecursiveLockLocking>::AutoLocker(recursive_lock&, bool, bool) + 0x65
12 ffffffff81dac7e0 (+ 128) ffffffff8017180d   <kernel_x86_64> DefaultUserNotificationService::EventOccurred(NotificationService&, BPrivate::KMessage const*) + 0x7d
13 ffffffff81dac860 (+ 128) ffffffff80170e7e   <kernel_x86_64> DefaultNotificationService::NotifyLocked(BPrivate::KMessage const&, unsigned int) + 0x7a
14 ffffffff81dac8a0 (+  64) ffffffff801820a3   <kernel_x86_64> DefaultNotificationService::Notify(BPrivate::KMessage const&, unsigned int) + 0x47
15 ffffffff81dac990 (+ 240) ffffffff80157b41   <kernel_x86_64> _GLOBAL__N_1::TeamNotificationService::Notify(unsigned int, BKernel::Team*) + 0xc9
16 ffffffff81daca00 (+ 112) ffffffff8015de06   <kernel_x86_64> team_delete_team + 0x16a
17 ffffffff81dacd20 (+ 800) ffffffff80165789   <kernel_x86_64> thread_exit + 0x803

It was waiting for a network mutex:

kdebug> mutex 0xffffffff81bbc438
mutex 0xffffffff81bbc438:
  name:            network
  flags:           0x0
  holder:          56
  waiting threads: 75

The holder was the launch daemon itself, in "get_module":

kdebug> bt 56
stack trace for thread 56 "launch_daemon"
    kernel stack: 0xffffffff81d8b000 to 0xffffffff81d90000
      user stack: 0x00007f5a775d0000 to 0x00007f5a785d0000
frame                       caller             <image>:function + offset
 0 ffffffff81d8f960 (+  80) ffffffff80275942   <kernel_x86_64> arch_thread_context_switch + 0x170
 1 ffffffff81d8f990 (+  48) ffffffff80179c68   <kernel_x86_64> switch_thread(BKernel::Thread*, BKernel::Thread*) + 0xbd
 2 ffffffff81d8fa50 (+ 192) ffffffff8017a273   <kernel_x86_64> reschedule(int) + 0x5fc
 3 ffffffff81d8fa80 (+  48) ffffffff8017a306   <kernel_x86_64> scheduler_reschedule + 0x79
 4 ffffffff81d8faa0 (+  32) ffffffff80166dd8   <kernel_x86_64> thread_block_locked(BKernel::Thread*) + 0x51
 5 ffffffff81d8fad0 (+  48) ffffffff80166e1c   <kernel_x86_64> thread_block + 0x38
 6 ffffffff81d8fb40 (+ 112) ffffffff80175875   <kernel_x86_64> _mutex_lock + 0x193
 7 ffffffff81d8fb60 (+  32) ffffffff80173d2a   <kernel_x86_64> mutex_lock + 0x1d
 8 ffffffff81d8fb90 (+  48) ffffffff80173f68   <kernel_x86_64> recursive_lock_lock + 0x75
 9 ffffffff81d8fbb0 (+  32) ffffffff80181fa2   <kernel_x86_64> BPrivate::RecursiveLockLocking::Lock(recursive_lock*) + 0x1c
10 ffffffff81d8fbd0 (+  32) ffffffff8018229b   <kernel_x86_64> BPrivate::AutoLocker<recursive_lock, BPrivate::RecursiveLockLocking>::Lock() + 0x41
11 ffffffff81d8fc00 (+  48) ffffffff80182117   <kernel_x86_64> BPrivate::AutoLocker<recursive_lock, BPrivate::RecursiveLockLocking>::AutoLocker(recursive_lock&, bool, bool) + 0x65
12 ffffffff81d8fc60 (+  96) ffffffff80142a31   <kernel_x86_64> get_module + 0x49
13 ffffffff81d8fc80 (+  32) ffffffff81bbb8e8   </boot/system/add-ons/kernel/network/notifications> NetNotificationService::FirstAdded[clone .localalias] () + 0x18
14 ffffffff81d8fce0 (+  96) ffffffff8017194f   <kernel_x86_64> DefaultUserNotificationService::_AddListener(unsigned int, NotificationListener&) + 0xa5
15 ffffffff81d8fd80 (+ 160) ffffffff8017174c   <kernel_x86_64> DefaultUserNotificationService::UpdateUserListener(unsigned int, int, unsigned int) + 0x118
16 ffffffff81d8fda0 (+  32) ffffffff81bbba3b   </boot/system/add-ons/kernel/network/notifications> net_notifications_control(char const*, unsigned int, void*, unsigned long) + 0x5b
17 ffffffff81d8ff20 (+ 384) ffffffff80153588   <kernel_x86_64> _user_generic_syscall(char const*, unsigned int, void*, unsigned long) + 0x1ec
18 ffffffff81d8ff30 (+  16) ffffffff80267b5f   <kernel_x86_64> x86_64_syscall_entry + 0xfb
user iframe at 0xffffffff81d8ff30 (end = 0xffffffff81d8fff8)
 rax 0x1                   rbx 0x7f5a785cf370        rcx 0x120a5ff622c
 rdx 0x7f5a785cf334        rsi 0x1                   rdi 0x12ab98939ae
 rbp 0x7f5a785cf360         r8 0x6                    r9 0x4
 r10 0xc                   r11 0x246                 r12 0xff
 r13 0x12ab98939ae         r14 0x7f5a785cf334        r15 0x0
 rip 0x120a5ff622c         rsp 0x7f5a785cf328     rflags 0x246
 vector: 0x63, error code: 0x0
19 00007f5a785cf360 (+   0) 00000120a5ff622c   <libroot.so> _kern_generic_syscall + 0x0c
20 00007f5a785cf3a0 (+  64) 0000012ab9892767   <libbnetapi.so> start_watching_network(unsigned int, BHandler const*, BLooper const*) + 0x27
21 00007f5a785cf3d0 (+  48) 00000067fa14021b   <_APP_> NetworkWatcher::NetworkWatcher() + 0x8b

Which of course was waiting for the modules lock:

kdebug> mutex 0xffffffff802f7060
mutex 0xffffffff802f7060:
  name:            modules rlock
  flags:           0x0
  holder:          46
  waiting threads: 56 49

Which was held by net_server, which was trying to delete the stack, and waiting for the generic_syscall to be not used anymore. But of course it can't be, because the thread executing the generic_syscall is waiting for the modules mutex.

kdebug> bt 46
stack trace for thread 46 "net_server"
    kernel stack: 0xffffffff81c84000 to 0xffffffff81c89000
      user stack: 0x00007fcd939e0000 to 0x00007fcd949e0000
frame                       caller             <image>:function + offset
 0 ffffffff81c88a70 (+  80) ffffffff80275942   <kernel_x86_64> arch_thread_context_switch + 0x170
 1 ffffffff81c88aa0 (+  48) ffffffff80179c68   <kernel_x86_64> switch_thread(BKernel::Thread*, BKernel::Thread*) + 0xbd
 2 ffffffff81c88b60 (+ 192) ffffffff8017a273   <kernel_x86_64> reschedule(int) + 0x5fc
 3 ffffffff81c88b90 (+  48) ffffffff8017a306   <kernel_x86_64> scheduler_reschedule + 0x79
 4 ffffffff81c88bb0 (+  32) ffffffff80166dd8   <kernel_x86_64> thread_block_locked(BKernel::Thread*) + 0x51
 5 ffffffff81c88be0 (+  48) ffffffff80166e1c   <kernel_x86_64> thread_block + 0x38
 6 ffffffff81c88c50 (+ 112) ffffffff8012d52e   <kernel_x86_64> ConditionVariableEntry::Wait(unsigned int, long) + 0x158
 7 ffffffff81c88cd0 (+ 128) ffffffff80157615   <kernel_x86_64> unregister_generic_syscall + 0xb7
 8 ffffffff81c88cf0 (+  32) ffffffff81bbb930   </boot/system/add-ons/kernel/network/notifications> notifications_std_ops(int, ...) + 0x30
 9 ffffffff81c88d20 (+  48) ffffffff8013f7f1   <kernel_x86_64> uninit_module(module*) + 0xd8
10 ffffffff81c88d70 (+  80) ffffffff80142d1c   <kernel_x86_64> put_module + 0x10d
11 ffffffff81c88da0 (+  48) ffffffff820ef83c   </boot/system/add-ons/kernel/network/stack> uninit_stack() + 0x1c
12 ffffffff81c88db0 (+  16) ffffffff820efd58   </boot/system/add-ons/kernel/network/stack> stack_interface_std_ops(int, ...) + 0x18
13 ffffffff81c88de0 (+  48) ffffffff8013f7f1   <kernel_x86_64> uninit_module(module*) + 0xd8
14 ffffffff81c88e30 (+  80) ffffffff80142d1c   <kernel_x86_64> put_module + 0x10d
15 ffffffff81c88e60 (+  48) ffffffff801fa07b   <kernel_x86_64> put_stack_interface_module() + 0x7c
16 ffffffff81c88e80 (+  32) ffffffff801fa7e7   <kernel_x86_64> socket_free(file_descriptor*) + 0x29
17 ffffffff81c88eb0 (+  48) ffffffff801effce   <kernel_x86_64> put_fd + 0x59
18 ffffffff81c88ee0 (+  48) ffffffff801f0163   <kernel_x86_64> close_fd_index + 0x51
19 ffffffff81c88f00 (+  32) ffffffff801f1906   <kernel_x86_64> common_close(int, bool) + 0x2b
20 ffffffff81c88f20 (+  32) ffffffff801f1ec4   <kernel_x86_64> _user_close + 0x1a
21 ffffffff81c88f30 (+  16) ffffffff80267b5f   <kernel_x86_64> x86_64_syscall_entry + 0xfb
user iframe at 0xffffffff81c88f30 (end = 0xffffffff81c88ff8)
 rax 0x9e                  rbx 0x7fcd949dec80        rcx 0x13f055cfbf9
 rdx 0x0                   rsi 0x2                   rdi 0x3
 rbp 0x7fcd949de710         r8 0x0                    r9 0x4
 r10 0x0                   r11 0x206                 r12 0x7fcd949de970
 r13 0x0                   r14 0x7fcd949dec08        r15 0x7fcd949dec04
 rip 0x13f055cfbf9         rsp 0x7fcd949de6f8     rflags 0x206
 vector: 0x63, error code: 0x0
22 00007fcd949de710 (+   0) 0000013f055cfbf9   <libroot.so> _kern_close + 0x09
23 00007fcd949de720 (+  16) 000001b8a9f55fef   <_APP_> NetServer::_IsValidFamily(unsigned int) + 0x1f
24 00007fcd949de960 (+ 576) 000001b8a9f58d21   <_APP_> NetServer::_BringUpInterfaces() + 0x21
25 00007fcd949de9a0 (+  64) 000001b8a9f58fa0   <_APP_> NetServer::ReadyToRun() + 0x40
26 00007fcd949debf0 (+ 592) 000000b996027187   <libbe.so> BApplication::DispatchMessage(BMessage*, BHandler*) + 0x467
27 00007fcd949dec40 (+  80) 000000b99602efc4   <libbe.so> BLooper::task_looper() + 0x294
28 00007fcd949dec60 (+  32) 000000b996023f11   <libbe.so> BApplication::Run() + 0x21
29 00007fcd949defb0 (+ 848) 000001b8a9f55b82   <_APP_> main + 0x42
30 00007fcd949defe0 (+  48) 000001b8a9f55cee   <_APP_> _start + 0x3e
31 00007fcd949df020 (+  64) 000001eb0d30d8bf   </boot/system/runtime_loader@0x000001eb0d2f7000> <unknown> + 0x168bf
32 0000000000000000 (+   0) 00007fa441afa2b0   <commpage> commpage_thread_exit + 0x00

Change History (0)

Note: See TracTickets for help on using tickets.