summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Pointner <equinox@spreadspace.org>2022-08-30 00:04:35 +0200
committerChristian Pointner <equinox@spreadspace.org>2022-08-30 00:04:35 +0200
commit6f6e8f2d3ec5291547da26c0441cf2728b3c8879 (patch)
tree78703d0c8ab69206c108d2fb148c230226e6f9ec
parentkubernetes/nfd: some more tests and improvements (diff)
kubernetes: add new intel-gpu device-plugin addon
-rw-r--r--dan/k8s-emc.yml1
-rw-r--r--inventory/group_vars/k8s-emc/vars.yml4
-rw-r--r--roles/kubernetes/addons/cert-manager/tasks/main.yml2
-rw-r--r--roles/kubernetes/addons/intel-gpu-device-plugin/defaults/main.yml5
-rw-r--r--roles/kubernetes/addons/intel-gpu-device-plugin/tasks/main.yml25
-rw-r--r--roles/kubernetes/addons/intel-gpu-device-plugin/templates/config.0.24.0.yml.j266
6 files changed, 102 insertions, 1 deletions
diff --git a/dan/k8s-emc.yml b/dan/k8s-emc.yml
index 340986e5..ce4b9c9e 100644
--- a/dan/k8s-emc.yml
+++ b/dan/k8s-emc.yml
@@ -39,3 +39,4 @@
- role: kubernetes/addons/openebs-zfs
- role: kubernetes/addons/cert-manager
- role: kubernetes/addons/node-feature-discovery
+ - role: kubernetes/addons/intel-gpu-device-plugin
diff --git a/inventory/group_vars/k8s-emc/vars.yml b/inventory/group_vars/k8s-emc/vars.yml
index 31ccd4d9..650092e6 100644
--- a/inventory/group_vars/k8s-emc/vars.yml
+++ b/inventory/group_vars/k8s-emc/vars.yml
@@ -111,3 +111,7 @@ kubernetes_node_feature_discovery_worker_config: |
- feature: kernel.loadedmodule
matchExpressions:
blackmagic_io: {op: Exists}
+
+kubernetes_intel_gpu_device_plugin_version: 0.24.0
+kubernetes_intel_gpu_device_plugin_node_selector:
+ "intel.feature.node.kubernetes.io/gpu": "true"
diff --git a/roles/kubernetes/addons/cert-manager/tasks/main.yml b/roles/kubernetes/addons/cert-manager/tasks/main.yml
index e34085e1..bc653e01 100644
--- a/roles/kubernetes/addons/cert-manager/tasks/main.yml
+++ b/roles/kubernetes/addons/cert-manager/tasks/main.yml
@@ -1,5 +1,5 @@
---
-- name: deploy metrics-server addon
+- name: deploy cert-manager addon
run_once: true
delegate_to: "{{ groups['_kubernetes_primary_controlplane_node_'] | first }}"
block:
diff --git a/roles/kubernetes/addons/intel-gpu-device-plugin/defaults/main.yml b/roles/kubernetes/addons/intel-gpu-device-plugin/defaults/main.yml
new file mode 100644
index 00000000..c65d79e0
--- /dev/null
+++ b/roles/kubernetes/addons/intel-gpu-device-plugin/defaults/main.yml
@@ -0,0 +1,5 @@
+---
+# kubernetes_intel_gpu_device_plugin_version: 0.24.0
+
+# kubernetes_intel_gpu_device_plugin_node_selector:
+# "intel.feature.node.kubernetes.io/gpu": "true"
diff --git a/roles/kubernetes/addons/intel-gpu-device-plugin/tasks/main.yml b/roles/kubernetes/addons/intel-gpu-device-plugin/tasks/main.yml
new file mode 100644
index 00000000..81115d35
--- /dev/null
+++ b/roles/kubernetes/addons/intel-gpu-device-plugin/tasks/main.yml
@@ -0,0 +1,25 @@
+---
+- name: deploy intel-gpu device-plugin
+ run_once: true
+ delegate_to: "{{ groups['_kubernetes_primary_controlplane_node_'] | first }}"
+ block:
+ - name: create base directory for intel-gpu device-plugin
+ file:
+ path: /etc/kubernetes/addons/intel-gpu-device-plugin
+ state: directory
+
+ - name: copy config for intel-gpu-device-plugin
+ template:
+ src: "config.{{ kubernetes_intel_gpu_device_plugin_version }}.yml.j2"
+ dest: /etc/kubernetes/addons/intel-gpu-device-plugin/config.yml
+
+ - name: check if intel-gpu device-plugin is already installed
+ check_mode: no
+ command: kubectl --kubeconfig /etc/kubernetes/admin.conf diff -f /etc/kubernetes/addons/intel-gpu-device-plugin/config.yml
+ failed_when: false
+ changed_when: false
+ register: kube_intel_gpu_device_plugin_diff_result
+
+ - name: install intel-gpu device-plugin onto the cluster
+ when: kube_intel_gpu_device_plugin_diff_result.rc != 0
+ command: kubectl --kubeconfig /etc/kubernetes/admin.conf apply -f /etc/kubernetes/addons/intel-gpu-device-plugin/config.yml
diff --git a/roles/kubernetes/addons/intel-gpu-device-plugin/templates/config.0.24.0.yml.j2 b/roles/kubernetes/addons/intel-gpu-device-plugin/templates/config.0.24.0.yml.j2
new file mode 100644
index 00000000..883212f9
--- /dev/null
+++ b/roles/kubernetes/addons/intel-gpu-device-plugin/templates/config.0.24.0.yml.j2
@@ -0,0 +1,66 @@
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+ labels:
+ app: intel-gpu-plugin
+ name: intel-gpu-plugin
+ namespace: kube-system
+spec:
+ selector:
+ matchLabels:
+ app: intel-gpu-plugin
+ template:
+ metadata:
+ labels:
+ app: intel-gpu-plugin
+ spec:
+ containers:
+ - env:
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ image: intel/intel-gpu-plugin:0.24.0
+ imagePullPolicy: IfNotPresent
+ name: intel-gpu-plugin
+ securityContext:
+ allowPrivilegeEscalation: false
+ readOnlyRootFilesystem: true
+ volumeMounts:
+ - mountPath: /dev/dri
+ name: devfs
+ readOnly: true
+ - mountPath: /sys/class/drm
+ name: sysfs
+ readOnly: true
+ - mountPath: /var/lib/kubelet/device-plugins
+ name: kubeletsockets
+ initContainers:
+ - image: intel/intel-gpu-initcontainer:0.24.0
+ imagePullPolicy: IfNotPresent
+ name: intel-gpu-initcontainer
+ securityContext:
+ allowPrivilegeEscalation: false
+ readOnlyRootFilesystem: true
+ volumeMounts:
+ - mountPath: /etc/kubernetes/node-feature-discovery/source.d/
+ name: nfd-source-hooks
+ nodeSelector:
+ kubernetes.io/arch: amd64
+{% if kubernetes_intel_gpu_device_plugin_node_selector is defined %}
+ {{ kubernetes_intel_gpu_device_plugin_node_selector | to_nice_yaml(indent=2) | indent(width=8) -}}
+{% endif %}
+ volumes:
+ - hostPath:
+ path: /dev/dri
+ name: devfs
+ - hostPath:
+ path: /sys/class/drm
+ name: sysfs
+ - hostPath:
+ path: /var/lib/kubelet/device-plugins
+ name: kubeletsockets
+ - hostPath:
+ path: /etc/kubernetes/node-feature-discovery/source.d/
+ type: DirectoryOrCreate
+ name: nfd-source-hooks