Bug#781210: systemd asserts on function cg_is_empty_recursive, crashes

Faidon Liambotis paravoid at debian.org
Thu Mar 26 02:50:41 GMT 2015


Package: systemd
Version: 215-12
Severity: critical

I've managed to reproducibly crash systemd:

# grep systemd /var/log/syslog | tail -3
Mar 26 01:02:15 curium systemd[1]: Assertion 'path' failed at ../src/shared/cgroup-util.c:913, function cg_is_empty_recursive().  Aborting.
Mar 26 01:02:15 curium systemd[1]: Caught <ABRT>, dumped core as pid 6916.
Mar 26 01:02:15 curium systemd[1]: Freezing execution.

After that, the system remains functioning (i.e. pid1 stays alive and
the kernel does not panic) but systemctl etc. do not respond ("Failed to
list units: Connection timed out") and the system as a whole is pretty
useless until a reboot.

This is hard to trigger as it happens under very specific conditions
plus a race, but I've managed to reproduce it five times already on two
different servers.

The gory details and steps to reproduce are over at #781209, but in
short:

- strongswan-starter ships an init script /etc/init.d/ipsec and a system
  unit file named "strongswan.service" but containing Alias=ipsec.service.

- strongswan-starter's postinst is buggy and calls "invoke-rc.d ipsec
  start" manually before the systemd unit is fully set up.

- This results into the ipsec daemons actually starting up in an
  ipsec.service cgroup, as evidenced by e.g. a systemctl status.

- A subsequent "systemctl restart strongswan" almost always results into
  the service becoming "inactive" and the processes under the
  ipsec.service cgroup being killed.

  Sometimes, though, the service gets into an "inactive (dead)" state
  but the processes from the (wrong) cgroup stay up. This possibly
  happens because systemd tries to set up a strongswan.service cgroup?

- At that point, the processes are orphaned and lost from systemd's
  control and are completely unmanageable by systemctl.
  
- Killing them by hand (e.g. via kill or "ipsec stop") crashes systemd.

A gdb "bt full" is attached.

Faidon
-------------- next part --------------
(gdb) bt full
#0  0x00007f8fb8b0d79b in raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/pt-raise.c:37
        resultvar = 0
        pid = <optimized out>
#1  0x00007f8fb8f633d8 in crash.lto_priv.234 (sig=6) at ../src/core/main.c:158
        rl = {rlim_cur = 18446744073709551615, rlim_max = 18446744073709551615}
        sa = {__sigaction_handler = {sa_handler = 0x0, sa_sigaction = 0x0}, sa_mask = {__val = {0 <repeats 16 times>}}, sa_flags = 0, sa_restorer = 0x0}
        __func__ = "crash"
        __PRETTY_FUNCTION__ = "crash"
#2  <signal handler called>
No locals.
#3  0x00007f8fb878a107 in __GI_raise (sig=sig at entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
        resultvar = 0
        pid = 1
        selftid = 1
#4  0x00007f8fb878b4e8 in __GI_abort () at abort.c:89
        save_stage = 2
        act = {__sigaction_handler = {sa_handler = 0x7ffd56ee0ae4, sa_sigaction = 0x7ffd56ee0ae4}, sa_mask = {__val = {140255286606672, 140255286672192, 
              140726061894624, 140255287343296, 5990105739957488896, 140255286672192, 140255260941991, 140255260944944, 2, 1, 140255287343248, 913, 
              140255260573900, 140255263445632, 140255287195268, 140255286635808}}, sa_flags = -1165411504, sa_restorer = 0x7f8fba8a3b40}
        sigs = {__val = {32, 0 <repeats 15 times>}}
#5  0x00007f8fb8f9aed2 in log_assert_failed (text=text at entry=0x7f8fb901c736 "path", file=file at entry=0x7f8fb9019ea7 "../src/shared/cgroup-util.c", 
    line=line at entry=913, func=func at entry=0x7f8fb901aa30 <__PRETTY_FUNCTION__.8851> "cg_is_empty_recursive") at ../src/shared/log.c:709
No locals.
#6  0x00007f8fb8f6cc8f in cg_is_empty_recursive.constprop.53 (path=0x0, ignore_self=ignore_self at entry=true, controller=<synthetic pointer>)
    at ../src/shared/cgroup-util.c:913
        d = 0x0
        fn = 0x7f8fba8a4d90 "\001"
        r = <optimized out>
#7  0x00007f8fb8ff21c3 in manager_notify_cgroup_empty (cgroup=<optimized out>, m=0x7f8fba893b50) at ../src/core/cgroup.c:978
        u = 0x7f8fba894d30
        r = <optimized out>
#8  signal_agent_released (bus=<optimized out>, message=0x7f8fba8a3b40, userdata=0x7f8fba893b50, error=<optimized out>) at ../src/core/dbus.c:90
        m = 0x7f8fba893b50
        cgroup = 0x7f8fba923684 "/system.slice/ipsec.service"
        r = <optimized out>
        __PRETTY_FUNCTION__ = "signal_agent_released"
        __func__ = "signal_agent_released"
#9  0x00007f8fb9009137 in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba90f260, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:299
        error_buffer = {name = 0x0, message = 0x0, _need_free = 0}
        slot = 0x7f8fba947890
        test_str = <optimized out>
        test_u8 = <optimized out>
        r = <optimized out>
        m = 0x7f8fba8a3b40
        node = 0x7f8fba90f260
        bus = <optimized out>
        __PRETTY_FUNCTION__ = "bus_match_run"
#10 0x00007f8fb9008dae in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba9242c0, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:391
        test_str = <optimized out>
        test_u8 = <optimized out>
        r = <optimized out>
        m = 0x7f8fba8a3b40
        node = 0x7f8fba9242c0
        bus = <optimized out>
        __PRETTY_FUNCTION__ = "bus_match_run"
#11 0x00007f8fb9008d88 in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba94b410, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:368
        found = <optimized out>
        test_str = <optimized out>
        test_u8 = <optimized out>
        r = <optimized out>
        m = 0x7f8fba8a3b40
        node = 0x7f8fba94b410
        bus = <optimized out>
        __PRETTY_FUNCTION__ = "bus_match_run"
#12 0x00007f8fb9008dae in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba924040, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:391
        test_str = <optimized out>
        test_u8 = <optimized out>
        r = <optimized out>
        m = 0x7f8fba8a3b40
        node = 0x7f8fba924040
        bus = <optimized out>
        __PRETTY_FUNCTION__ = "bus_match_run"
#13 0x00007f8fb9008d88 in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba93d2d0, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:368
        found = <optimized out>
        test_str = <optimized out>
        test_u8 = <optimized out>
        r = <optimized out>
        m = 0x7f8fba8a3b40
        node = 0x7f8fba93d2d0
        bus = <optimized out>
        __PRETTY_FUNCTION__ = "bus_match_run"
#14 0x00007f8fb9008dae in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba9237a0, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:391
        test_str = <optimized out>
        test_u8 = <optimized out>
        r = <optimized out>
        m = 0x7f8fba8a3b40
        node = 0x7f8fba9237a0
        bus = <optimized out>
        __PRETTY_FUNCTION__ = "bus_match_run"
#15 0x00007f8fb9008d88 in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba930bb0, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:368
        found = <optimized out>
        test_str = <optimized out>
        test_u8 = <optimized out>
        r = <optimized out>
        m = 0x7f8fba8a3b40
        node = 0x7f8fba930bb0
        bus = <optimized out>
        __PRETTY_FUNCTION__ = "bus_match_run"
#16 0x00007f8fb9008dae in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba91eb70, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:391
        test_str = <optimized out>
        test_u8 = <optimized out>
        r = <optimized out>
        m = 0x7f8fba8a3b40
        node = 0x7f8fba91eb70
        bus = <optimized out>
        __PRETTY_FUNCTION__ = "bus_match_run"
#17 0x00007f8fb9008d88 in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba91e8d0, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:368
        found = <optimized out>
        test_str = <optimized out>
        test_u8 = <optimized out>
        r = <optimized out>
        m = 0x7f8fba8a3b40
        node = 0x7f8fba91e8d0
        bus = <optimized out>
        __PRETTY_FUNCTION__ = "bus_match_run"
#18 0x00007f8fb9008dae in bus_match_run (bus=0x7f8fba8c2970, node=0x7f8fba922ac0, m=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/bus-match.c:391
        test_str = <optimized out>
        test_u8 = <optimized out>
        r = <optimized out>
        m = 0x7f8fba8a3b40
        node = 0x7f8fba922ac0
        bus = <optimized out>
        __PRETTY_FUNCTION__ = "bus_match_run"
#19 0x00007f8fb9008c9e in bus_match_run (bus=bus at entry=0x7f8fba8c2970, node=node at entry=0x7f8fba8c29f0, m=m at entry=0x7f8fba8a3b40)
    at ../src/libsystemd/sd-bus/bus-match.c:266
        test_str = <optimized out>
        test_u8 = <optimized out>
        r = <optimized out>
        m = 0x7f8fba8a3b40
        node = 0x7f8fba8c29f0
        bus = <optimized out>
        __PRETTY_FUNCTION__ = "bus_match_run"
#20 0x00007f8fb8f8eb48 in process_match.lto_priv.252 (bus=bus at entry=0x7f8fba8c2970, m=m at entry=0x7f8fba8a3b40) at ../src/libsystemd/sd-bus/sd-bus.c:2261
        r = <optimized out>
        __PRETTY_FUNCTION__ = "process_match"
#21 0x00007f8fb8f792bb in process_message (m=<optimized out>, bus=0x7f8fba8c2970) at ../src/libsystemd/sd-bus/sd-bus.c:2386
        r = 0
#22 process_running (priority=0, ret=0x0, hint_priority=false, bus=0x7f8fba8c2970) at ../src/libsystemd/sd-bus/sd-bus.c:2436
        m = <optimized out>
        r = <optimized out>
#23 bus_process_internal.constprop.172 (bus=0x7f8fba8c2970, ret=0x0, priority=0, hint_priority=false) at ../src/libsystemd/sd-bus/sd-bus.c:2619
        _dont_destroy_bus = 0x7f8fba8c2970
#24 0x00007f8fb8f64061 in sd_bus_process (ret=0x0, bus=<optimized out>) at ../src/libsystemd/sd-bus/sd-bus.c:2638
No locals.
#25 io_callback (s=<optimized out>, fd=<optimized out>, revents=<optimized out>, userdata=<optimized out>) at ../src/libsystemd/sd-bus/sd-bus.c:2897
        bus = <optimized out>
        __PRETTY_FUNCTION__ = "io_callback"
#26 0x00007f8fb8f66fe0 in source_dispatch (s=0x7f8fba894530) at ../src/libsystemd/sd-event/sd-event.c:2016
        __PRETTY_FUNCTION__ = "source_dispatch"
        __func__ = "source_dispatch"
#27 0x00007f8fb8f6822f in sd_event_run (e=0x7f8fba893f80, timeout=<optimized out>) at ../src/libsystemd/sd-event/sd-event.c:2314
        ev_queue = <optimized out>
        ev_queue_max = <optimized out>
        p = <optimized out>
        r = 0
        i = <optimized out>
        m = 1
        timedout = false
        __PRETTY_FUNCTION__ = "sd_event_run"
#28 0x00007f8fb9011211 in manager_loop (m=0x7f8fba893b50) at ../src/core/manager.c:1912
        wait_usec = 1
        rl = {interval = 1000000, begin = 919722059, burst = 50000, num = 6}
        __PRETTY_FUNCTION__ = "manager_loop"
        __func__ = "manager_loop"
#29 0x00007f8fb8f5f22b in main (argc=1, argv=<optimized out>) at ../src/core/main.c:1743
        m = 0x7f8fba893b50
        r = <optimized out>
        retval = 1
        before_startup = <optimized out>
        after_startup = <optimized out>
        timespan = "f\336\b`\330qX\034Q\343\061\256\272\343\222|s\302x\356.cx\204\221U_\025J=G\363BE\325\354/cx\204\060cx\204\221x\251?\373\062\000\362?\355s\240sU\241\063S\247\273"
        fds = 0x0
        reexecute = false
        shutdown_verb = 0x0
        initrd_timestamp = {realtime = 0, monotonic = 0}
        userspace_timestamp = {realtime = 1427328950819373, monotonic = 19892013}
        kernel_timestamp = {realtime = <optimized out>, monotonic = 0}
        security_start_timestamp = {realtime = 1427328950821716, monotonic = 19894356}
        security_finish_timestamp = {realtime = 1427328950822668, monotonic = 19895308}
        systemd = "systemd"
        skip_setup = false
        j = <optimized out>
        loaded_policy = <optimized out>
        arm_reboot_watchdog = false
        queue_default_job = <optimized out>
        empty_etc = <optimized out>
        switch_root_dir = 0x0
        switch_root_init = 0x0
        __func__ = "main"
        __PRETTY_FUNCTION__ = "main"



More information about the Pkg-systemd-maintainers mailing list