From 6f6e8f2d3ec5291547da26c0441cf2728b3c8879 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Tue, 30 Aug 2022 00:04:35 +0200 Subject: kubernetes: add new intel-gpu device-plugin addon --- dan/k8s-emc.yml | 1 + inventory/group_vars/k8s-emc/vars.yml | 4 ++ .../kubernetes/addons/cert-manager/tasks/main.yml | 2 +- .../intel-gpu-device-plugin/defaults/main.yml | 5 ++ .../addons/intel-gpu-device-plugin/tasks/main.yml | 25 ++++++++ .../templates/config.0.24.0.yml.j2 | 66 ++++++++++++++++++++++ 6 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 roles/kubernetes/addons/intel-gpu-device-plugin/defaults/main.yml create mode 100644 roles/kubernetes/addons/intel-gpu-device-plugin/tasks/main.yml create mode 100644 roles/kubernetes/addons/intel-gpu-device-plugin/templates/config.0.24.0.yml.j2 diff --git a/dan/k8s-emc.yml b/dan/k8s-emc.yml index 340986e5..ce4b9c9e 100644 --- a/dan/k8s-emc.yml +++ b/dan/k8s-emc.yml @@ -39,3 +39,4 @@ - role: kubernetes/addons/openebs-zfs - role: kubernetes/addons/cert-manager - role: kubernetes/addons/node-feature-discovery + - role: kubernetes/addons/intel-gpu-device-plugin diff --git a/inventory/group_vars/k8s-emc/vars.yml b/inventory/group_vars/k8s-emc/vars.yml index 31ccd4d9..650092e6 100644 --- a/inventory/group_vars/k8s-emc/vars.yml +++ b/inventory/group_vars/k8s-emc/vars.yml @@ -111,3 +111,7 @@ kubernetes_node_feature_discovery_worker_config: | - feature: kernel.loadedmodule matchExpressions: blackmagic_io: {op: Exists} + +kubernetes_intel_gpu_device_plugin_version: 0.24.0 +kubernetes_intel_gpu_device_plugin_node_selector: + "intel.feature.node.kubernetes.io/gpu": "true" diff --git a/roles/kubernetes/addons/cert-manager/tasks/main.yml b/roles/kubernetes/addons/cert-manager/tasks/main.yml index e34085e1..bc653e01 100644 --- a/roles/kubernetes/addons/cert-manager/tasks/main.yml +++ b/roles/kubernetes/addons/cert-manager/tasks/main.yml @@ -1,5 +1,5 @@ --- -- name: deploy metrics-server addon +- name: deploy cert-manager addon run_once: true delegate_to: "{{ groups['_kubernetes_primary_controlplane_node_'] | first }}" block: diff --git a/roles/kubernetes/addons/intel-gpu-device-plugin/defaults/main.yml b/roles/kubernetes/addons/intel-gpu-device-plugin/defaults/main.yml new file mode 100644 index 00000000..c65d79e0 --- /dev/null +++ b/roles/kubernetes/addons/intel-gpu-device-plugin/defaults/main.yml @@ -0,0 +1,5 @@ +--- +# kubernetes_intel_gpu_device_plugin_version: 0.24.0 + +# kubernetes_intel_gpu_device_plugin_node_selector: +# "intel.feature.node.kubernetes.io/gpu": "true" diff --git a/roles/kubernetes/addons/intel-gpu-device-plugin/tasks/main.yml b/roles/kubernetes/addons/intel-gpu-device-plugin/tasks/main.yml new file mode 100644 index 00000000..81115d35 --- /dev/null +++ b/roles/kubernetes/addons/intel-gpu-device-plugin/tasks/main.yml @@ -0,0 +1,25 @@ +--- +- name: deploy intel-gpu device-plugin + run_once: true + delegate_to: "{{ groups['_kubernetes_primary_controlplane_node_'] | first }}" + block: + - name: create base directory for intel-gpu device-plugin + file: + path: /etc/kubernetes/addons/intel-gpu-device-plugin + state: directory + + - name: copy config for intel-gpu-device-plugin + template: + src: "config.{{ kubernetes_intel_gpu_device_plugin_version }}.yml.j2" + dest: /etc/kubernetes/addons/intel-gpu-device-plugin/config.yml + + - name: check if intel-gpu device-plugin is already installed + check_mode: no + command: kubectl --kubeconfig /etc/kubernetes/admin.conf diff -f /etc/kubernetes/addons/intel-gpu-device-plugin/config.yml + failed_when: false + changed_when: false + register: kube_intel_gpu_device_plugin_diff_result + + - name: install intel-gpu device-plugin onto the cluster + when: kube_intel_gpu_device_plugin_diff_result.rc != 0 + command: kubectl --kubeconfig /etc/kubernetes/admin.conf apply -f /etc/kubernetes/addons/intel-gpu-device-plugin/config.yml diff --git a/roles/kubernetes/addons/intel-gpu-device-plugin/templates/config.0.24.0.yml.j2 b/roles/kubernetes/addons/intel-gpu-device-plugin/templates/config.0.24.0.yml.j2 new file mode 100644 index 00000000..883212f9 --- /dev/null +++ b/roles/kubernetes/addons/intel-gpu-device-plugin/templates/config.0.24.0.yml.j2 @@ -0,0 +1,66 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app: intel-gpu-plugin + name: intel-gpu-plugin + namespace: kube-system +spec: + selector: + matchLabels: + app: intel-gpu-plugin + template: + metadata: + labels: + app: intel-gpu-plugin + spec: + containers: + - env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + image: intel/intel-gpu-plugin:0.24.0 + imagePullPolicy: IfNotPresent + name: intel-gpu-plugin + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /dev/dri + name: devfs + readOnly: true + - mountPath: /sys/class/drm + name: sysfs + readOnly: true + - mountPath: /var/lib/kubelet/device-plugins + name: kubeletsockets + initContainers: + - image: intel/intel-gpu-initcontainer:0.24.0 + imagePullPolicy: IfNotPresent + name: intel-gpu-initcontainer + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /etc/kubernetes/node-feature-discovery/source.d/ + name: nfd-source-hooks + nodeSelector: + kubernetes.io/arch: amd64 +{% if kubernetes_intel_gpu_device_plugin_node_selector is defined %} + {{ kubernetes_intel_gpu_device_plugin_node_selector | to_nice_yaml(indent=2) | indent(width=8) -}} +{% endif %} + volumes: + - hostPath: + path: /dev/dri + name: devfs + - hostPath: + path: /sys/class/drm + name: sysfs + - hostPath: + path: /var/lib/kubelet/device-plugins + name: kubeletsockets + - hostPath: + path: /etc/kubernetes/node-feature-discovery/source.d/ + type: DirectoryOrCreate + name: nfd-source-hooks -- cgit v1.2.3