[Pkg-libvirt-maintainers] Bug#983871: error: internal error: failed to get cgroup backend for 'pathOfController'
Guido Günther
gg at godiug.net
Wed Nov 17 12:06:20 GMT 2021
Hi,
On Fri, Oct 22, 2021 at 12:35:17AM +0700, Dio Putra wrote:
> Hi, this bug just happened right in front of me (see my picture attachment).
> Fortunately, I was able to create a rebase patch to Debian Bullseye:
> https://listman.redhat.com/archives/libvir-list/2021-April/msg00756.html
>
> Here's my patch:
> >From ea7d0ca37cce76e1327945c4864b996d7fd6d2e6 Mon Sep 17 00:00:00
> 2001
thanks. I've moved that to an MR at for a point release update. Testing is
appreciated.
Cheers,
-- Guido
> Message-Id: <ea7d0ca37cce76e1327945c4864b996d7fd6d2e6.1618903455.git.mprivozn at redhat.com>
> From: Michal Privoznik <mprivozn at redhat.com>
> Date: Fri, 16 Apr 2021 16:39:14 +0200
> Subject: [PATCH] vircgroup: Fix virCgroupKillRecursive() wrt nested
> controllers
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> I've encountered the following bug, but only on Gentoo with
> systemd and CGroupsV2. I've started an LXC container successfully
> but destroying it reported the following error:
>
> error: Failed to destroy domain 'amd64'
> error: internal error: failed to get cgroup backend for 'pathOfController'
>
> Debugging showed, that CGroup hierarchy is full of surprises:
>
> /sys/fs/cgroup/machine.slice/machine-lxc\x2d861\x2damd64.scope/
> └── libvirt
> ├── dev-hugepages.mount
> ├── dev-mqueue.mount
> ├── init.scope
> ├── sys-fs-fuse-connections.mount
> ├── sys-kernel-config.mount
> ├── sys-kernel-debug.mount
> ├── sys-kernel-tracing.mount
> ├── system.slice
> │ ├── console-getty.service
> │ ├── dbus.service
> │ ├── system-getty.slice
> │ ├── system-modprobe.slice
> │ ├── systemd-journald.service
> │ ├── systemd-logind.service
> │ └── tmp.mount
> └── user.slice
>
> For comparison, here's the same container on recent Rawhide:
>
> /sys/fs/cgroup/machine.slice/machine-lxc\x2d13550\x2damd64.scope/
> └── libvirt
>
> Anyway, those nested directories should not be a problem, because
> virCgroupKillRecursiveInternal() removes them recursively, right?
> Sort of. The function really does remove nested directories, but
> it assumes that every directory has the same controller as the
> rest. Just take a look at virCgroupV2KillRecursive() - it gets
> 'Any' controller (the first one it found in ".scope") and then
> passes it to virCgroupKillRecursiveInternal().
>
> This assumption is not true though. The controllers found in
> ".scope" are the following:
>
> cpuset cpu io memory pids
>
> while "libvirt" has fewer:
>
> cpuset cpu io memory
>
> Up until now it's not problem, because of how we order
> controllers internally - "cpu" is the first and thus picking
> "Any" controller returns just that. But the rest of directories
> has no controllers, their "cgroup.controllers" is just empty.
>
> What fixes the bug is dropping @controller argument from
> virCgroupKillRecursiveInternal() and letting each iteration work
> pick its own controller.
>
> Signed-off-by: Michal Privoznik <mprivozn at redhat.com>
> Reviewed-by: Pavel Hrdina <phrdina at redhat.com>
> ---
> src/util/vircgroup.c | 29 +++++++++++++++++++++++++----
> src/util/vircgrouppriv.h | 1 -
> src/util/vircgroupv1.c | 7 +------
> src/util/vircgroupv2.c | 7 +------
> 4 files changed, 27 insertions(+), 17 deletions(-)
>
> Signed-off-by: Dio Putra <dioput12 at gmail.com>
> ---
> --- libvirt-7.0.0.orig/src/util/vircgroup.c
> +++ libvirt-7.0.0/src/util/vircgroup.c
> @@ -1380,6 +1380,24 @@
> }
>
>
> +static int
> +virCgroupGetAnyController(virCgroup *cgroup)
> +{
> + size_t i;
> +
> + for (i = 0; i < VIR_CGROUP_BACKEND_TYPE_LAST; i++) {
> + if (!cgroup->backends[i])
> + continue;
> +
> + return cgroup->backends[i]->getAnyController(cgroup);
> + }
> +
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("Unable to get any controller"));
> + return -1;
> +}
> +
> +
> int
> virCgroupPathOfController(virCgroupPtr group,
> unsigned int controller,
> @@ -2548,18 +2566,21 @@
> virCgroupKillRecursiveInternal(virCgroupPtr group,
> int signum,
> GHashTable *pids,
> - int controller,
> const char *taskFile,
> bool dormdir)
> {
> int rc;
> + int controller;
> bool killedAny = false;
> g_autofree char *keypath = NULL;
> g_autoptr(DIR) dp = NULL;
> struct dirent *ent;
> int direrr;
> - VIR_DEBUG("group=%p signum=%d pids=%p",
> - group, signum, pids);
> + VIR_DEBUG("group=%p signum=%d pids=%p taskFile=%s dormdir=%d",
> + group, signum, pids, taskFile, dormdir);
> +
> + if ((controller = virCgroupGetAnyController(group)) < 0)
> + return -1;
>
> if (virCgroupPathOfController(group, controller, "", &keypath) < 0)
> return -1;
> @@ -2593,7 +2614,7 @@
> return -1;
>
> if ((rc = virCgroupKillRecursiveInternal(subgroup, signum, pids,
> - controller,
> taskFile, true)) < 0)
> + taskFile, true)) < 0)
> return -1;
> if (rc == 1)
> killedAny = true;
> --- libvirt-7.0.0.orig/src/util/vircgrouppriv.h
> +++ libvirt-7.0.0/src/util/vircgrouppriv.h
> @@ -128,6 +128,5 @@ int virCgroupRemoveRecursively(char *grp
> int virCgroupKillRecursiveInternal(virCgroupPtr group,
> int signum,
> GHashTable *pids,
> - int controller,
> const char *taskFile,
> bool dormdir);
> --- libvirt-7.0.0.orig/src/util/vircgroupv1.c
> +++ libvirt-7.0.0/src/util/vircgroupv1.c
> @@ -771,12 +771,7 @@ virCgroupV1KillRecursive(virCgroupPtr gr
> int signum,
> GHashTable *pids)
> {
> - int controller = virCgroupV1GetAnyController(group);
> -
> - if (controller < 0)
> - return -1;
> -
> - return virCgroupKillRecursiveInternal(group, signum, pids, controller,
> + return virCgroupKillRecursiveInternal(group, signum, pids,
> "tasks", false);
> }
>
> --- libvirt-7.0.0.orig/src/util/vircgroupv2.c
> +++ libvirt-7.0.0/src/util/vircgroupv2.c
> @@ -543,12 +543,7 @@ virCgroupV2KillRecursive(virCgroupPtr gr
> int signum,
> GHashTable *pids)
> {
> - int controller = virCgroupV2GetAnyController(group);
> -
> - if (controller < 0)
> - return -1;
> -
> - return virCgroupKillRecursiveInternal(group, signum, pids, controller,
> + return virCgroupKillRecursiveInternal(group, signum, pids,
> "cgroup.threads", false);
> }
>
>
> On Tue, 02 Mar 2021 15:37:38 +0100 Thorsten Glaser <tg at mirbsd.de> wrote:
> > Package: libvirt-daemon
> > Version: 7.0.0-2
> > Severity: important
> > X-Debbugs-Cc: tg at mirbsd.de
> >
> > After an upgrade+reboot I cannot start VMs *again* with some cgroup error:
> >
> > $ wirrsh start Netboot
> > error: Failed to start domain 'Netboot'
> > error: internal error: failed to get cgroup backend for 'pathOfController'
> >
> > To unconfuse:
> >
> > $ alias wirrsh
> > wirrsh='virsh -c qemu:///system'
> >
> > -- System Information:
> > Debian Release: bullseye/sid
> > APT prefers unreleased
> > APT policy: (500, 'unreleased'), (500, 'buildd-unstable'), (500, 'unstable'), (100, 'experimental')
> > Architecture: x32 (x86_64)
> > Foreign Architectures: i386, amd64
> >
> > Kernel: Linux 5.10.0-3-amd64 (SMP w/4 CPU threads)
> > Kernel taint flags: TAINT_FIRMWARE_WORKAROUND
> > Locale: LANG=C, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8), LANGUAGE not set
> > Shell: /bin/sh linked to /bin/lksh
> > Init: sysvinit (via /sbin/init)
> >
> > Versions of packages libvirt-daemon depends on:
> > ii libblkid1 2.36.1-7
> > ii libc6 2.31-9
> > ii libdevmapper1.02.1 2:1.02.175-2.1
> > ii libgcc-s1 10.2.1-6
> > ii libglib2.0-0 2.66.7-1
> > ii libnetcf1 1:0.2.8-1.1
> > ii libparted2 3.4-1
> > ii libpcap0.8 1.10.0-2
> > ii libpciaccess0 0.16-1
> > ii libselinux1 3.1-3
> > ii libudev1 247.3-1
> > ii libvirt-daemon-driver-qemu 7.0.0-2
> > ii libvirt0 7.0.0-2
> > ii libxml2 2.9.10+dfsg-6.3+b1
> >
> > Versions of packages libvirt-daemon recommends:
> > pn libvirt-daemon-driver-lxc <none>
> > pn libvirt-daemon-driver-vbox <none>
> > pn libvirt-daemon-driver-xen <none>
> > ii libxml2-utils 2.9.10+dfsg-6.3+b1
> > ii netcat-openbsd 1.217-3
> > ii qemu-system 1:5.2+dfsg-6
> >
> > Versions of packages libvirt-daemon suggests:
> > pn libvirt-daemon-driver-storage-gluster <none>
> > pn libvirt-daemon-driver-storage-iscsi-direct <none>
> > pn libvirt-daemon-driver-storage-rbd <none>
> > pn libvirt-daemon-driver-storage-zfs <none>
> > ii libvirt-daemon-system 7.0.0-2
> > pn numad <none>
> From ea7d0ca37cce76e1327945c4864b996d7fd6d2e6 Mon Sep 17 00:00:00 2001
> Message-Id: <ea7d0ca37cce76e1327945c4864b996d7fd6d2e6.1618903455.git.mprivozn at redhat.com>
> From: Michal Privoznik <mprivozn at redhat.com>
> Date: Fri, 16 Apr 2021 16:39:14 +0200
> Subject: [PATCH] vircgroup: Fix virCgroupKillRecursive() wrt nested
> controllers
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> I've encountered the following bug, but only on Gentoo with
> systemd and CGroupsV2. I've started an LXC container successfully
> but destroying it reported the following error:
>
> error: Failed to destroy domain 'amd64'
> error: internal error: failed to get cgroup backend for 'pathOfController'
>
> Debugging showed, that CGroup hierarchy is full of surprises:
>
> /sys/fs/cgroup/machine.slice/machine-lxc\x2d861\x2damd64.scope/
> └── libvirt
> ├── dev-hugepages.mount
> ├── dev-mqueue.mount
> ├── init.scope
> ├── sys-fs-fuse-connections.mount
> ├── sys-kernel-config.mount
> ├── sys-kernel-debug.mount
> ├── sys-kernel-tracing.mount
> ├── system.slice
> │ ├── console-getty.service
> │ ├── dbus.service
> │ ├── system-getty.slice
> │ ├── system-modprobe.slice
> │ ├── systemd-journald.service
> │ ├── systemd-logind.service
> │ └── tmp.mount
> └── user.slice
>
> For comparison, here's the same container on recent Rawhide:
>
> /sys/fs/cgroup/machine.slice/machine-lxc\x2d13550\x2damd64.scope/
> └── libvirt
>
> Anyway, those nested directories should not be a problem, because
> virCgroupKillRecursiveInternal() removes them recursively, right?
> Sort of. The function really does remove nested directories, but
> it assumes that every directory has the same controller as the
> rest. Just take a look at virCgroupV2KillRecursive() - it gets
> 'Any' controller (the first one it found in ".scope") and then
> passes it to virCgroupKillRecursiveInternal().
>
> This assumption is not true though. The controllers found in
> ".scope" are the following:
>
> cpuset cpu io memory pids
>
> while "libvirt" has fewer:
>
> cpuset cpu io memory
>
> Up until now it's not problem, because of how we order
> controllers internally - "cpu" is the first and thus picking
> "Any" controller returns just that. But the rest of directories
> has no controllers, their "cgroup.controllers" is just empty.
>
> What fixes the bug is dropping @controller argument from
> virCgroupKillRecursiveInternal() and letting each iteration work
> pick its own controller.
>
> Signed-off-by: Michal Privoznik <mprivozn at redhat.com>
> Reviewed-by: Pavel Hrdina <phrdina at redhat.com>
> ---
> src/util/vircgroup.c | 29 +++++++++++++++++++++++++----
> src/util/vircgrouppriv.h | 1 -
> src/util/vircgroupv1.c | 7 +------
> src/util/vircgroupv2.c | 7 +------
> 4 files changed, 27 insertions(+), 17 deletions(-)
>
> Signed-by: Dio Putra <dioput12 at gmail.com>
> ---
> --- libvirt-7.0.0.orig/src/util/vircgroup.c
> +++ libvirt-7.0.0/src/util/vircgroup.c
> @@ -1380,6 +1380,24 @@
> }
>
>
> +static int
> +virCgroupGetAnyController(virCgroup *cgroup)
> +{
> + size_t i;
> +
> + for (i = 0; i < VIR_CGROUP_BACKEND_TYPE_LAST; i++) {
> + if (!cgroup->backends[i])
> + continue;
> +
> + return cgroup->backends[i]->getAnyController(cgroup);
> + }
> +
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("Unable to get any controller"));
> + return -1;
> +}
> +
> +
> int
> virCgroupPathOfController(virCgroupPtr group,
> unsigned int controller,
> @@ -2548,18 +2566,21 @@
> virCgroupKillRecursiveInternal(virCgroupPtr group,
> int signum,
> GHashTable *pids,
> - int controller,
> const char *taskFile,
> bool dormdir)
> {
> int rc;
> + int controller;
> bool killedAny = false;
> g_autofree char *keypath = NULL;
> g_autoptr(DIR) dp = NULL;
> struct dirent *ent;
> int direrr;
> - VIR_DEBUG("group=%p signum=%d pids=%p",
> - group, signum, pids);
> + VIR_DEBUG("group=%p signum=%d pids=%p taskFile=%s dormdir=%d",
> + group, signum, pids, taskFile, dormdir);
> +
> + if ((controller = virCgroupGetAnyController(group)) < 0)
> + return -1;
>
> if (virCgroupPathOfController(group, controller, "", &keypath) < 0)
> return -1;
> @@ -2593,7 +2614,7 @@
> return -1;
>
> if ((rc = virCgroupKillRecursiveInternal(subgroup, signum, pids,
> - controller, taskFile, true)) < 0)
> + taskFile, true)) < 0)
> return -1;
> if (rc == 1)
> killedAny = true;
> --- libvirt-7.0.0.orig/src/util/vircgrouppriv.h
> +++ libvirt-7.0.0/src/util/vircgrouppriv.h
> @@ -128,6 +128,5 @@ int virCgroupRemoveRecursively(char *grp
> int virCgroupKillRecursiveInternal(virCgroupPtr group,
> int signum,
> GHashTable *pids,
> - int controller,
> const char *taskFile,
> bool dormdir);
> --- libvirt-7.0.0.orig/src/util/vircgroupv1.c
> +++ libvirt-7.0.0/src/util/vircgroupv1.c
> @@ -771,12 +771,7 @@ virCgroupV1KillRecursive(virCgroupPtr gr
> int signum,
> GHashTable *pids)
> {
> - int controller = virCgroupV1GetAnyController(group);
> -
> - if (controller < 0)
> - return -1;
> -
> - return virCgroupKillRecursiveInternal(group, signum, pids, controller,
> + return virCgroupKillRecursiveInternal(group, signum, pids,
> "tasks", false);
> }
>
> --- libvirt-7.0.0.orig/src/util/vircgroupv2.c
> +++ libvirt-7.0.0/src/util/vircgroupv2.c
> @@ -543,12 +543,7 @@ virCgroupV2KillRecursive(virCgroupPtr gr
> int signum,
> GHashTable *pids)
> {
> - int controller = virCgroupV2GetAnyController(group);
> -
> - if (controller < 0)
> - return -1;
> -
> - return virCgroupKillRecursiveInternal(group, signum, pids, controller,
> + return virCgroupKillRecursiveInternal(group, signum, pids,
> "cgroup.threads", false);
> }
>
More information about the Pkg-libvirt-maintainers
mailing list