Bug#781210: systemd asserts on function cg_is_empty_recursive, crashes
Faidon Liambotis
paravoid at debian.org
Thu Mar 26 02:50:41 GMT 2015
Package: systemd
Version: 215-12
Severity: critical
I've managed to reproducibly crash systemd:
# grep systemd /var/log/syslog | tail -3
Mar 26 01:02:15 curium systemd[1]: Assertion 'path' failed at ../src/shared/cgroup-util.c:913, function cg_is_empty_recursive(). Aborting.
Mar 26 01:02:15 curium systemd[1]: Caught <ABRT>, dumped core as pid 6916.
Mar 26 01:02:15 curium systemd[1]: Freezing execution.
After that, the system remains functioning (i.e. pid1 stays alive and
the kernel does not panic) but systemctl etc. do not respond ("Failed to
list units: Connection timed out") and the system as a whole is pretty
useless until a reboot.
This is hard to trigger as it happens under very specific conditions
plus a race, but I've managed to reproduce it five times already on two
different servers.
The gory details and steps to reproduce are over at #781209, but in
short:
- strongswan-starter ships an init script /etc/init.d/ipsec and a system
unit file named "strongswan.service" but containing Alias=ipsec.service.
- strongswan-starter's postinst is buggy and calls "invoke-rc.d ipsec
start" manually before the systemd unit is fully set up.
- This results into the ipsec daemons actually starting up in an
ipsec.service cgroup, as evidenced by e.g. a systemctl status.
- A subsequent "systemctl restart strongswan" almost always results into
the service becoming "inactive" and the processes under the
ipsec.service cgroup being killed.
Sometimes, though, the service gets into an "inactive (dead)" state
but the processes from the (wrong) cgroup stay up. This possibly
happens because systemd tries to set up a strongswan.service cgroup?
- At that point, the processes are orphaned and lost from systemd's
control and are completely unmanageable by systemctl.
- Killing them by hand (e.g. via kill or "ipsec stop") crashes systemd.
A gdb "bt full" is attached.
Faidon
-------------- next part --------------
(gdb) bt full
#0 0x00007f8fb8b0d79b in raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/pt-raise.c:37
resultvar = 0
pid = <optimized out>
#1 0x00007f8fb8f633d8 in crash.lto_priv.234 (sig=6) at ../src/core/main.c:158
rl = {rlim_cur = 18446744073709551615, rlim_max = 18446744073709551615}
sa = {__sigaction_handler = {sa_handler = 0x0, sa_sigaction = 0x0}, sa_mask = {__val = {0 <repeats 16 times>}}, sa_flags = 0, sa_restorer = 0x0}
__func__ = "crash"
__PRETTY_FUNCTION__ = "crash"
#2 <signal handler called>
No locals.
#3 0x00007f8fb878a107 in __GI_raise (sig=sig at entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
resultvar = 0
pid = 1
selftid = 1
#4 0x00007f8fb878b4e8 in __GI_abort () at abort.c:89
save_stage = 2
act = {__sigaction_handler = {sa_handler = 0x7ffd56ee0ae4, sa_sigaction = 0x7ffd56ee0ae4}, sa_mask = {__val = {140255286606672, 140255286672192,
140726061894624, 140255287343296, 5990105739957488896, 140255286672192, 140255260941991, 140255260944944, 2, 1, 140255287343248, 913,
140255260573900, 140255263445632, 140255287195268, 140255286635808}}, sa_flags = -1165411504, sa_restorer = 0x7f8fba8a3b40}
sigs = {__val = {32, 0 <repeats 15 times>}}
#5 0x00007f8fb8f9aed2 in log_assert_failed (text=text at entry=0x7f8fb901c736 "path", file=file at entry=0x7f8fb9019ea7 "../src/shared/cgroup-util.c",
line=line at entry=913, func=func at entry=0x7f8fb901aa30 <__PRETTY_FUNCTION__.8851> "cg_is_empty_recursive") at ../src/shared/log.c:709
No locals.
#6 0x00007f8fb8f6cc8f in cg_is_empty_recursive.constprop.53 (path=0x0, ignore_self=ignore_self at entry=true, controller=<synthetic pointer>)
at ../src/shared/cgroup-util.c:913
d = 0x0
fn = 0x7f8fba8a4d90 "\001"
r = <optimized out>
#7 0x00007f8fb8ff21c3 in manager_notify_cgroup_empty (cgroup=<optimized out>, m=0x7f8fba893b50) at ../src/core/cgroup.c:978
u = 0x7f8fba894d30
r = <optimized out>
#8 signal_agent_released (bus=<optimized out>, message=0x7f8fba8a3b40, userdata=0x7f8fba893b50, error=<optimized out>) at ../src/core/dbus.c:90
m = 0x7f8fba893b50
cgroup = 0x7f8fba923684 "/system.slice/ipsec.service"
r = <optimized out>
__PRETTY_FUNCTION__ = "signal_agent_released"
__func__ = "signal_agent_released"
#9 0x00007f8fb9009137 in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba90f260, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:299
error_buffer = {name = 0x0, message = 0x0, _need_free = 0}
slot = 0x7f8fba947890
test_str = <optimized out>
test_u8 = <optimized out>
r = <optimized out>
m = 0x7f8fba8a3b40
node = 0x7f8fba90f260
bus = <optimized out>
__PRETTY_FUNCTION__ = "bus_match_run"
#10 0x00007f8fb9008dae in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba9242c0, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:391
test_str = <optimized out>
test_u8 = <optimized out>
r = <optimized out>
m = 0x7f8fba8a3b40
node = 0x7f8fba9242c0
bus = <optimized out>
__PRETTY_FUNCTION__ = "bus_match_run"
#11 0x00007f8fb9008d88 in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba94b410, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:368
found = <optimized out>
test_str = <optimized out>
test_u8 = <optimized out>
r = <optimized out>
m = 0x7f8fba8a3b40
node = 0x7f8fba94b410
bus = <optimized out>
__PRETTY_FUNCTION__ = "bus_match_run"
#12 0x00007f8fb9008dae in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba924040, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:391
test_str = <optimized out>
test_u8 = <optimized out>
r = <optimized out>
m = 0x7f8fba8a3b40
node = 0x7f8fba924040
bus = <optimized out>
__PRETTY_FUNCTION__ = "bus_match_run"
#13 0x00007f8fb9008d88 in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba93d2d0, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:368
found = <optimized out>
test_str = <optimized out>
test_u8 = <optimized out>
r = <optimized out>
m = 0x7f8fba8a3b40
node = 0x7f8fba93d2d0
bus = <optimized out>
__PRETTY_FUNCTION__ = "bus_match_run"
#14 0x00007f8fb9008dae in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba9237a0, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:391
test_str = <optimized out>
test_u8 = <optimized out>
r = <optimized out>
m = 0x7f8fba8a3b40
node = 0x7f8fba9237a0
bus = <optimized out>
__PRETTY_FUNCTION__ = "bus_match_run"
#15 0x00007f8fb9008d88 in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba930bb0, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:368
found = <optimized out>
test_str = <optimized out>
test_u8 = <optimized out>
r = <optimized out>
m = 0x7f8fba8a3b40
node = 0x7f8fba930bb0
bus = <optimized out>
__PRETTY_FUNCTION__ = "bus_match_run"
#16 0x00007f8fb9008dae in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba91eb70, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:391
test_str = <optimized out>
test_u8 = <optimized out>
r = <optimized out>
m = 0x7f8fba8a3b40
node = 0x7f8fba91eb70
bus = <optimized out>
__PRETTY_FUNCTION__ = "bus_match_run"
#17 0x00007f8fb9008d88 in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba91e8d0, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:368
found = <optimized out>
test_str = <optimized out>
test_u8 = <optimized out>
r = <optimized out>
m = 0x7f8fba8a3b40
node = 0x7f8fba91e8d0
bus = <optimized out>
__PRETTY_FUNCTION__ = "bus_match_run"
#18 0x00007f8fb9008dae in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba922ac0, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:391
test_str = <optimized out>
test_u8 = <optimized out>
r = <optimized out>
m = 0x7f8fba8a3b40
node = 0x7f8fba922ac0
bus = <optimized out>
__PRETTY_FUNCTION__ = "bus_match_run"
#19 0x00007f8fb9008c9e in bus_match_run (bus=bus at entry=0x7f8fba8c2970, node=node at entry=0x7f8fba8c29f0, m=m at entry=0x7f8fba8a3b40)
at ../src/libsystemd/sd-bus/bus-match.c:266
test_str = <optimized out>
test_u8 = <optimized out>
r = <optimized out>
m = 0x7f8fba8a3b40
node = 0x7f8fba8c29f0
bus = <optimized out>
__PRETTY_FUNCTION__ = "bus_match_run"
#20 0x00007f8fb8f8eb48 in process_match.lto_priv.252 (bus=bus at entry=0x7f8fba8c2970, m=m at entry=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/sd-bus.c:2261
r = <optimized out>
__PRETTY_FUNCTION__ = "process_match"
#21 0x00007f8fb8f792bb in process_message (m=<optimized out>, bus=0x7f8fba8c2970) at ../src/libsystemd/sd-bus/sd-bus.c:2386
r = 0
#22 process_running (priority=0, ret=0x0, hint_priority=false, bus=0x7f8fba8c2970) at ../src/libsystemd/sd-bus/sd-bus.c:2436
m = <optimized out>
r = <optimized out>
#23 bus_process_internal.constprop.172 (bus=0x7f8fba8c2970, ret=0x0, priority=0, hint_priority=false) at ../src/libsystemd/sd-bus/sd-bus.c:2619
_dont_destroy_bus = 0x7f8fba8c2970
#24 0x00007f8fb8f64061 in sd_bus_process (ret=0x0, bus=<optimized out>) at ../src/libsystemd/sd-bus/sd-bus.c:2638
No locals.
#25 io_callback (s=<optimized out>, fd=<optimized out>, revents=<optimized out>, userdata=<optimized out>) at ../src/libsystemd/sd-bus/sd-bus.c:2897
bus = <optimized out>
__PRETTY_FUNCTION__ = "io_callback"
#26 0x00007f8fb8f66fe0 in source_dispatch (s=0x7f8fba894530) at ../src/libsystemd/sd-event/sd-event.c:2016
__PRETTY_FUNCTION__ = "source_dispatch"
__func__ = "source_dispatch"
#27 0x00007f8fb8f6822f in sd_event_run (e=0x7f8fba893f80, timeout=<optimized out>) at ../src/libsystemd/sd-event/sd-event.c:2314
ev_queue = <optimized out>
ev_queue_max = <optimized out>
p = <optimized out>
r = 0
i = <optimized out>
m = 1
timedout = false
__PRETTY_FUNCTION__ = "sd_event_run"
#28 0x00007f8fb9011211 in manager_loop (m=0x7f8fba893b50) at ../src/core/manager.c:1912
wait_usec = 1
rl = {interval = 1000000, begin = 919722059, burst = 50000, num = 6}
__PRETTY_FUNCTION__ = "manager_loop"
__func__ = "manager_loop"
#29 0x00007f8fb8f5f22b in main (argc=1, argv=<optimized out>) at ../src/core/main.c:1743
m = 0x7f8fba893b50
r = <optimized out>
retval = 1
before_startup = <optimized out>
after_startup = <optimized out>
timespan = "f\336\b`\330qX\034Q\343\061\256\272\343\222|s\302x\356.cx\204\221U_\025J=G\363BE\325\354/cx\204\060cx\204\221x\251?\373\062\000\362?\355s\240sU\241\063S\247\273"
fds = 0x0
reexecute = false
shutdown_verb = 0x0
initrd_timestamp = {realtime = 0, monotonic = 0}
userspace_timestamp = {realtime = 1427328950819373, monotonic = 19892013}
kernel_timestamp = {realtime = <optimized out>, monotonic = 0}
security_start_timestamp = {realtime = 1427328950821716, monotonic = 19894356}
security_finish_timestamp = {realtime = 1427328950822668, monotonic = 19895308}
systemd = "systemd"
skip_setup = false
j = <optimized out>
loaded_policy = <optimized out>
arm_reboot_watchdog = false
queue_default_job = <optimized out>
empty_etc = <optimized out>
switch_root_dir = 0x0
switch_root_init = 0x0
__func__ = "main"
__PRETTY_FUNCTION__ = "main"
More information about the Pkg-systemd-maintainers
mailing list