Upgrade kubevirt to v0.59.0 (#5470)

- Upgrade kubevirt to v0.59.0
- Add patches specific to Nexus Operator Edge.
These are yet to be upstreamed but since they are critical for the June GA release,
patching the sources.
This commit is contained in:
kanikanema 2023-05-12 21:17:27 +05:30 committed by GitHub
parent e9c15bde59
commit 1a44ec84b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 283 additions and 131 deletions

View File

@ -0,0 +1,34 @@
From d2348697b0f398e1950dd9a0f1442d1bba47dec9 Mon Sep 17 00:00:00 2001
From: Kanika Nema <kanikanema@microsoft.com>
Date: Fri, 12 May 2023 14:59:19 +0000
Subject: [PATCH] Allocate 2 cpu for the emulator thread instead of one
The ensures SMT alignment when cpu-manager policy
full-pcpus-only is used. Preventing a dedicated cpu
from sharing time with the emulator thread/virt-launcher
processes.
---
pkg/virt-controller/services/renderresources.go | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/pkg/virt-controller/services/renderresources.go b/pkg/virt-controller/services/renderresources.go
index 46b242932..d75354bc5 100644
--- a/pkg/virt-controller/services/renderresources.go
+++ b/pkg/virt-controller/services/renderresources.go
@@ -194,9 +194,11 @@ func WithCPUPinning(cpu *v1.CPU) ResourceRendererOption {
}
}
- // allocate 1 more pcpu if IsolateEmulatorThread request
+ // allocate 2 more pcpu if IsolateEmulatorThread request
+ // the extra cpu ensures hyperthread SMT cpu alignment when cpu-manager
+ // policy full-pcpus-only is used.
if cpu.IsolateEmulatorThread {
- emulatorThreadCPU := resource.NewQuantity(1, resource.BinarySI)
+ emulatorThreadCPU := resource.NewQuantity(2, resource.BinarySI)
limits := renderer.calculatedLimits[k8sv1.ResourceCPU]
limits.Add(*emulatorThreadCPU)
renderer.vmLimits[k8sv1.ResourceCPU] = limits
--
2.25.1

View File

@ -0,0 +1,230 @@
From 79847c89951f8da3772760e50406924b49a0ac6a Mon Sep 17 00:00:00 2001
From: Kanika Nema <kanikanema@microsoft.com>
Date: Fri, 12 May 2023 14:57:03 +0000
Subject: [PATCH] Cleanup housekeeping cgroup on vm delete
Move cgroup.threads back to parent cgroup and rmdir the
housekeeping cgroup before the vm is deleted.
This prevents a bug in runc where it cannot read housekeeping/cgroup.procs
stopping vm from being deleted
---
pkg/virt-handler/cgroup/cgroup.go | 6 +++
pkg/virt-handler/cgroup/cgroup_v1_manager.go | 19 +++++++++
pkg/virt-handler/cgroup/cgroup_v2_manager.go | 19 +++++++++
pkg/virt-handler/cgroup/util.go | 33 +++++++++++++++
pkg/virt-handler/vm.go | 44 ++++++++++++++++++++
5 files changed, 121 insertions(+)
diff --git a/pkg/virt-handler/cgroup/cgroup.go b/pkg/virt-handler/cgroup/cgroup.go
index b0b28a046..052b5aa96 100644
--- a/pkg/virt-handler/cgroup/cgroup.go
+++ b/pkg/virt-handler/cgroup/cgroup.go
@@ -63,11 +63,17 @@ type Manager interface {
// Create new child cgroup
CreateChildCgroup(name string, subSystem string) error
+ // Delete child cgroup
+ DeleteChildCgroup(name string, subSystem string) error
+
// Attach TID to cgroup
AttachTID(subSystem string, subCgroup string, tid int) error
// Get list of threads attached to cgroup
GetCgroupThreads() ([]int, error)
+
+ // Get list of threads attached to a child cgroup
+ GetChildCgroupThreads(childCgroup string) ([]int, error)
}
// This is here so that mockgen would create a mock out of it. That way we would have a mocked runc manager.
diff --git a/pkg/virt-handler/cgroup/cgroup_v1_manager.go b/pkg/virt-handler/cgroup/cgroup_v1_manager.go
index 073d7a820..c5ee7706b 100644
--- a/pkg/virt-handler/cgroup/cgroup_v1_manager.go
+++ b/pkg/virt-handler/cgroup/cgroup_v1_manager.go
@@ -183,6 +183,21 @@ func init_cgroup(groupPath string, newCgroupName string, subSystem string) (err
return nil
}
+func (v *v1Manager) DeleteChildCgroup(name string, subSystem string) error {
+ subSysPath, err := v.GetBasePathToHostSubsystem(subSystem)
+ if err != nil {
+ return err
+ }
+
+ newGroupPath := filepath.Join(subSysPath, name)
+
+ err = os.Remove(newGroupPath)
+ if err != nil {
+ log.Log.Warningf("rmdir %s failed", newGroupPath)
+ }
+ return nil
+}
+
func (v *v1Manager) CreateChildCgroup(name string, subSystem string) error {
subSysPath, err := v.GetBasePathToHostSubsystem(subSystem)
if err != nil {
@@ -201,6 +216,10 @@ func (v *v1Manager) GetCgroupThreads() ([]int, error) {
return getCgroupThreadsHelper(v, "tasks")
}
+func (v *v1Manager) GetChildCgroupThreads(childCgroup string) ([]int, error) {
+ return getChildCgroupThreadsHelper(v, childCgroup, "cgroup.threads")
+}
+
func (v *v1Manager) SetCpuSet(subcgroup string, cpulist []int) error {
return setCpuSetHelper(v, subcgroup, cpulist)
}
diff --git a/pkg/virt-handler/cgroup/cgroup_v2_manager.go b/pkg/virt-handler/cgroup/cgroup_v2_manager.go
index 4b8bf63f0..6dbaf3189 100644
--- a/pkg/virt-handler/cgroup/cgroup_v2_manager.go
+++ b/pkg/virt-handler/cgroup/cgroup_v2_manager.go
@@ -75,6 +75,21 @@ func (v *v2Manager) GetCpuSet() (string, error) {
return getCpuSetPath(v, "cpuset.cpus.effective")
}
+func (v *v2Manager) DeleteChildCgroup(name string, subSystem string) error {
+ subSysPath, err := v.GetBasePathToHostSubsystem(subSystem)
+ if err != nil {
+ return err
+ }
+
+ newGroupPath := filepath.Join(subSysPath, name)
+
+ err = os.Remove(newGroupPath)
+ if err != nil {
+ log.Log.Warningf("rmdir %s failed", newGroupPath)
+ }
+ return nil
+}
+
func (v *v2Manager) CreateChildCgroup(name string, subSystem string) error {
subSysPath, err := v.GetBasePathToHostSubsystem(subSystem)
if err != nil {
@@ -140,6 +155,10 @@ func (v *v2Manager) GetCgroupThreads() ([]int, error) {
return getCgroupThreadsHelper(v, "cgroup.threads")
}
+func (v *v2Manager) GetChildCgroupThreads(childCgroup string) ([]int, error) {
+ return getChildCgroupThreadsHelper(v, childCgroup, "cgroup.threads")
+}
+
func (v *v2Manager) SetCpuSet(subcgroup string, cpulist []int) error {
return setCpuSetHelper(v, subcgroup, cpulist)
}
diff --git a/pkg/virt-handler/cgroup/util.go b/pkg/virt-handler/cgroup/util.go
index f088dc4ce..820671f8f 100644
--- a/pkg/virt-handler/cgroup/util.go
+++ b/pkg/virt-handler/cgroup/util.go
@@ -222,6 +222,39 @@ func getCgroupThreadsHelper(manager Manager, fname string) ([]int, error) {
return tIds, nil
}
+func getChildCgroupThreadsHelper(manager Manager, childCgroup string, fname string) ([]int, error) {
+ tIds := make([]int, 0, 10)
+
+ subSysPath, err := manager.GetBasePathToHostSubsystem("cpuset")
+ if err != nil {
+ return nil, err
+ }
+
+ fh, err := os.Open(filepath.Join(subSysPath, childCgroup, fname))
+ if err != nil {
+ log.Log.Warningf("no %s in path %s/%s - %v", fname, subSysPath, childCgroup, err)
+ // file didnt exist, so effectively the thread list is empty. return that and dont error.
+ return tIds, nil
+ }
+ defer fh.Close()
+
+ scanner := bufio.NewScanner(fh)
+ for scanner.Scan() {
+ line := scanner.Text()
+ intVal, err := strconv.Atoi(line)
+ if err != nil {
+ log.Log.Errorf("error converting %s: %v", line, err)
+ return nil, err
+ }
+ tIds = append(tIds, intVal)
+ }
+ if err := scanner.Err(); err != nil {
+ log.Log.Errorf("error reading %s: %v", fname, err)
+ return nil, err
+ }
+ return tIds, nil
+}
+
// set cpus "cpusList" on the allowed CPUs. Optionally on a subcgroup of
// the pods control group (if subcgroup != nil).
func setCpuSetHelper(manager Manager, subCgroup string, cpusList []int) error {
diff --git a/pkg/virt-handler/vm.go b/pkg/virt-handler/vm.go
index 698133860..d0b0051d2 100644
--- a/pkg/virt-handler/vm.go
+++ b/pkg/virt-handler/vm.go
@@ -2135,6 +2135,13 @@ func (d *VirtualMachineController) processVmShutdown(vmi *v1.VirtualMachineInsta
return err
}
+ if vmi.IsCPUDedicated() && vmi.Spec.Domain.CPU.IsolateEmulatorThread {
+ err = d.removeHousekeepingCgroup(vmi)
+ if err != nil {
+ return err
+ }
+ }
+
// Only attempt to gracefully shutdown if the domain has the ACPI feature enabled
if isACPIEnabled(vmi, domain) {
if expired, timeLeft := d.hasGracePeriodExpired(domain); !expired {
@@ -2215,6 +2222,13 @@ func (d *VirtualMachineController) processVmDelete(vmi *v1.VirtualMachineInstanc
// pending deletion.
d.recorder.Event(vmi, k8sv1.EventTypeNormal, v1.Deleted.String(), VMISignalDeletion)
+ if vmi.IsCPUDedicated() && vmi.Spec.Domain.CPU.IsolateEmulatorThread {
+ err = d.removeHousekeepingCgroup(vmi)
+ if err != nil {
+ return err
+ }
+ }
+
err = client.DeleteDomain(vmi)
if err != nil && !cmdclient.IsDisconnected(err) {
// Only report err if it wasn't the result of a disconnect.
@@ -2688,6 +2702,36 @@ func (d *VirtualMachineController) configureHousekeepingCgroup(vmi *v1.VirtualMa
return nil
}
+func (d *VirtualMachineController) removeHousekeepingCgroup(vmi *v1.VirtualMachineInstance) error {
+ cgroupManager, err := cgroup.NewManagerFromVM(vmi)
+ if err != nil {
+ return err
+ }
+
+ log.Log.Object(vmi).Info("Getting housekeeping cgroup.threads")
+ tids, err := cgroupManager.GetChildCgroupThreads("housekeeping")
+ if err != nil {
+ return err
+ }
+
+ log.Log.Object(vmi).Infof("Moving housekeeping threads back to parent cgroup. ids: %v", tids)
+ for _, tid := range tids {
+ err = cgroupManager.AttachTID("cpuset", "", tid)
+ if err != nil {
+ log.Log.Object(vmi).Errorf("Error attaching tid %d: %v", tid, err.Error())
+ return err
+ }
+ }
+
+ log.Log.Object(vmi).Info("Deleting housekeeping cgroup")
+ err = cgroupManager.DeleteChildCgroup("housekeeping", "cpuset")
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
func (d *VirtualMachineController) vmUpdateHelperDefault(origVMI *v1.VirtualMachineInstance, domainExists bool) error {
client, err := d.getLauncherClient(origVMI)
if err != nil {
--
2.25.1

View File

@ -22,12 +22,12 @@ spec:
serviceAccountName: kubevirt-testing
containers:
- name: target
image: quay.io/kubevirt/disks-images-provider:v0.58.0
image: quay.io/kubevirt/disks-images-provider:v0.59.0
imagePullPolicy: Always
lifecycle:
preStop:
exec:
command: ["/bin/sh","-c","source /etc/bashrc && chroot /host losetup -d ${LOOP_DEVICE} && chroot /host umount ${LOOP_DEVICE_HP} && chroot /host losetup -d ${LOOP_DEVICE_HP}"]
command: ["/bin/sh","-c","source /etc/bashrc && chroot /host umount ${LOOP_DEVICE_HP} && chroot /host losetup -d ${LOOP_DEVICE_HP}"]
volumeMounts:
- name: images
mountPath: /hostImages

View File

@ -1,117 +0,0 @@
commit 28d947c6c487166c8d41b785a8a96a814ce1af81
Author: Roman Mohr <rmohr@google.com>
Date: Tue Aug 23 19:31:36 2022 +0200
Use fgetxattr to get selinux labels
Instead of using lgetxattr on a fd path (/proc/self/fd/<num>), directly
use `fgetxattr`.
It turns out that `lgetxattr` does not return a consistent result on all
kernel version when used on a filedescriptor path.
This is often not an issue. It would just mean that virt-handler would
label a few devices in its namespces on every start, even if it would
not have to. But on some operating systems (e.g. Centos8, but not
Centos8 stream) we then fail on the not needed relabeling attempt.
Before:
lgetxattr sometimes returns weird resources on a file descroptor path:
```
Error: error relabeling file /proc/self/fd/7 from label system_u:system_r:spc_t:s0 to label system_u:object_r:container_file_t:s0. Reason: operation not supported
[...]
error relabeling file /proc/self/fd/7 from label system_u:system_r:spc_t:s0 to label system_u:object_r:container_file_t:s0. Reason: operation not supported
```
After:
Successful detection of matching labels results in no action:
```
root@virt-handler-gk5vb:~# ./virt-chroot selinux relabel system_u:object_r:container_file_t:s0 /dev/net/tun
```
Mismatches are still detected as expected:
```
root@virt-handler-gk5vb:~# ./virt-chroot selinux relabel system_u:object_r:container_file_t:s1 /dev/net/tun
Error: error relabeling file /proc/self/fd/7 from label system_u:object_r:container_file_t:s0 to label system_u:object_r:container_file_t:s1. Reason: operation not supported
[...]
error relabeling file /proc/self/fd/7 from label system_u:object_r:container_file_t:s0 to label system_u:object_r:container_file_t:s1. Reason: operation not supported
```
Signed-off-by: Roman Mohr <rmohr@google.com>
diff --git a/cmd/virt-chroot/BUILD.bazel b/cmd/virt-chroot/BUILD.bazel
index fd26041a0..250a25bf2 100644
--- a/cmd/virt-chroot/BUILD.bazel
+++ b/cmd/virt-chroot/BUILD.bazel
@@ -17,7 +17,6 @@ go_library(
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
- "//vendor/github.com/opencontainers/selinux/go-selinux:go_default_library",
"//vendor/github.com/spf13/cobra:go_default_library",
"//vendor/github.com/vishvananda/netlink:go_default_library",
"//vendor/golang.org/x/sys/unix:go_default_library",
diff --git a/cmd/virt-chroot/selinux.go b/cmd/virt-chroot/selinux.go
index 29cc03712..c75962017 100644
--- a/cmd/virt-chroot/selinux.go
+++ b/cmd/virt-chroot/selinux.go
@@ -7,7 +7,6 @@ import (
"os"
"path/filepath"
- "github.com/opencontainers/selinux/go-selinux"
"github.com/spf13/cobra"
"golang.org/x/sys/unix"
@@ -67,10 +66,6 @@ func RelabelCommand() *cobra.Command {
defer fd.Close()
filePath := fd.SafePath()
- currentFileLabel, err := selinux.FileLabel(filePath)
- if err != nil {
- return fmt.Errorf("could not retrieve label of file %s. Reason: %v", filePath, err)
- }
writeableFD, err := os.OpenFile(filePath, os.O_APPEND|unix.S_IWRITE, os.ModePerm)
if err != nil {
@@ -78,6 +73,11 @@ func RelabelCommand() *cobra.Command {
}
defer writeableFD.Close()
+ currentFileLabel, err := getLabel(writeableFD)
+ if err != nil {
+ return fmt.Errorf("faild to get selinux label for file %v: %v", filePath, err)
+ }
+
if currentFileLabel != label {
if err := unix.Fsetxattr(int(writeableFD.Fd()), xattrNameSelinux, []byte(label), 0); err != nil {
return fmt.Errorf("error relabeling file %s with label %s. Reason: %v", filePath, label, err)
@@ -90,3 +90,22 @@ func RelabelCommand() *cobra.Command {
relabelCommad.Flags().StringVar(&root, "root", "/", "safe root path which will be prepended to passed in files")
return relabelCommad
}
+
+func getLabel(file *os.File) (string, error) {
+ // let's first find out the actual buffer size
+ var buffer []byte
+ labelLength, err := unix.Fgetxattr(int(file.Fd()), xattrNameSelinux, buffer)
+ if err != nil {
+ return "", fmt.Errorf("error reading fgetxattr: %v", err)
+ }
+ // now ask with the needed size
+ buffer = make([]byte, labelLength)
+ labelLength, err = unix.Fgetxattr(int(file.Fd()), xattrNameSelinux, buffer)
+ if err != nil {
+ return "", fmt.Errorf("error reading fgetxattr: %v", err)
+ }
+ if labelLength > 0 && buffer[labelLength-1] == '\x00' {
+ labelLength = labelLength - 1
+ }
+ return string(buffer[:labelLength]), nil
+}

View File

@ -1,6 +1,6 @@
{
"Signatures": {
"disks-images-provider.yaml": "02beaa28c9d39e4d677569cc5e0fb9d0c1251a5749e2e93afb32d35e69def63c",
"kubevirt-0.58.0.tar.gz": "e54c74af63180e785e998630a73d4c85f56e3f74bca44c76318d12aacb8400e0"
"disks-images-provider.yaml": "6da2f333ad97e2c6e16272bec70a1747d11505377e5aa27ccae550f00b368c43",
"kubevirt-0.59.0.tar.gz": "4f9557e9102163e2d9c3ca237cb40cc2e88f05d03b4e0d97745b2a09e5ab3a78"
}
}

View File

@ -19,8 +19,8 @@
%global debug_package %{nil}
Summary: Container native virtualization
Name: kubevirt
Version: 0.58.0
Release: 7%{?dist}
Version: 0.59.0
Release: 2%{?dist}
License: ASL 2.0
Vendor: Microsoft Corporation
Distribution: Mariner
@ -28,8 +28,10 @@ Group: System/Management
URL: https://github.com/kubevirt/kubevirt
Source0: https://github.com/kubevirt/kubevirt/archive/refs/tags/v%{version}.tar.gz#/%{name}-%{version}.tar.gz
Source1: disks-images-provider.yaml
# Upstream patch to fix issue #8544, PR #8594
Patch0: fgetxattr-for-relabel.patch
# Nexus team needs these to-be-upstreamed patches for the operator Edge to work
# correctly.
Patch0: Cleanup-housekeeping-cgroup-on-vm-del.patch
Patch1: Allocate-2-cpu-for-the-emulator-thread.patch
BuildRequires: glibc-devel
BuildRequires: glibc-static >= 2.35-3%{?dist}
BuildRequires: golang
@ -143,7 +145,7 @@ install -p -m 0755 cmd/virt-launcher/node-labeller/node-labeller.sh %{buildroot}
# virt-launcher configurations
mkdir -p %{buildroot}%{_datadir}/kube-virt/virt-launcher
install -p -m 0644 cmd/virt-launcher/qemu.conf %{buildroot}%{_datadir}/kube-virt/virt-launcher/
install -p -m 0644 cmd/virt-launcher/libvirtd.conf %{buildroot}%{_datadir}/kube-virt/virt-launcher/
install -p -m 0644 cmd/virt-launcher/virtqemud.conf %{buildroot}%{_datadir}/kube-virt/virt-launcher/
install -p -m 0644 cmd/virt-launcher/nsswitch.conf %{buildroot}%{_datadir}/kube-virt/virt-launcher/
@ -153,9 +155,6 @@ install -p -m 0644 cmd/virt-handler/virt_launcher.cil %{buildroot}/
# Install network stuff
mkdir -p %{buildroot}%{_datadir}/kube-virt/virt-handler
install -p -m 0644 cmd/virt-handler/nsswitch.conf %{buildroot}%{_datadir}/kube-virt/virt-handler/
install -p -m 0644 cmd/virt-handler/ipv4-nat.nft %{buildroot}%{_datadir}/kube-virt/virt-handler/
install -p -m 0644 cmd/virt-handler/ipv6-nat.nft %{buildroot}%{_datadir}/kube-virt/virt-handler/
%files virtctl
%license LICENSE
@ -211,6 +210,12 @@ install -p -m 0644 cmd/virt-handler/ipv6-nat.nft %{buildroot}%{_datadir}/kube-vi
%{_bindir}/virt-tests
%changelog
* Fri May 12 2023 Kanika Nema <kanikanema@microsoft.com> - 0.59.0-2
- Patch 0.59.0 with Operator Nexus patches
* Fri May 05 2023 Kanika Nema <kanikanema@microsoft.com> - 0.59.0-1
- Upgrade to v0.59.0
* Wed Apr 05 2023 CBL-Mariner Servicing Account <cblmargh@microsoft.com> - 0.58.0-7
- Bump release to rebuild with go 1.19.8

View File

@ -8351,8 +8351,8 @@
"type": "other",
"other": {
"name": "kubevirt",
"version": "0.58.0",
"downloadUrl": "https://github.com/kubevirt/kubevirt/archive/refs/tags/v0.58.0.tar.gz"
"version": "0.59.0",
"downloadUrl": "https://github.com/kubevirt/kubevirt/archive/refs/tags/v0.59.0.tar.gz"
}
}
},