Merge branch 'upstream-bump-version' into 'master'

klueska · klueska · commit bc572009964b · 2020-04-06T16:44:51.000Z
Bump version to 1.0.0-beta5

See merge request nvidia/kubernetes/device-plugin!23
diff --git a/README.md b/README.md
@@ -70,7 +70,7 @@ Once you have enabled this option on *all* the GPU nodes you wish to use,
 you can then enable GPU support in your cluster by deploying the following Daemonset:
 
 ```shell
-$ kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/1.0.0-beta4/nvidia-device-plugin.yml
+$ kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/1.0.0-beta5/nvidia-device-plugin.yml
 ```
 
 ### Running GPU Jobs
@@ -115,24 +115,24 @@ The next sections are focused on building the device plugin and running it.
 #### Build
 Option 1, pull the prebuilt image from [Docker Hub](https://hub.docker.com/r/nvidia/k8s-device-plugin):
 ```shell
-$ docker pull nvidia/k8s-device-plugin:1.0.0-beta4
+$ docker pull nvidia/k8s-device-plugin:1.0.0-beta5
 ```
 
 Option 2, build without cloning the repository:
 ```shell
-$ docker build -t nvidia/k8s-device-plugin:1.0.0-beta4 https://github.com/NVIDIA/k8s-device-plugin.git#1.0.0-beta4
+$ docker build -t nvidia/k8s-device-plugin:1.0.0-beta5 https://github.com/NVIDIA/k8s-device-plugin.git#1.0.0-beta5
 ```
 
 Option 3, if you want to modify the code:
 ```shell
 $ git clone https://github.com/NVIDIA/k8s-device-plugin.git && cd k8s-device-plugin
-$ git checkout 1.0.0-beta4
-$ docker build -t nvidia/k8s-device-plugin:1.0.0-beta4 .
+$ git checkout 1.0.0-beta5
+$ docker build -t nvidia/k8s-device-plugin:1.0.0-beta5 .
 ```
 
 #### Run locally
 ```shell
-$ docker run --security-opt=no-new-privileges --cap-drop=ALL --network=none -it -v /var/lib/kubelet/device-plugins:/var/lib/kubelet/device-plugins nvidia/k8s-device-plugin:1.0.0-beta4
+$ docker run --security-opt=no-new-privileges --cap-drop=ALL --network=none -it -v /var/lib/kubelet/device-plugins:/var/lib/kubelet/device-plugins nvidia/k8s-device-plugin:1.0.0-beta5
 ```
 
 #### Deploy as Daemon Set:
@@ -154,6 +154,17 @@ $ ./k8s-device-plugin
 
 ## Changelog
 
+### Version 1.0.0-beta5
+
+- Add a new plugin.yml variant that is compatible with the CPUManager
+- Change CMD in Dockerfile to ENTRYPOINT
+- Add flag to optionally return list of device nodes in Allocate() call
+- Refactor device plugin to eventually handle multiple resource types
+- Move plugin error retry to event loop so we can exit with a signal
+- Update all vendored dependencies to their latest versions
+- Fix bug that was inadvertently *always* disabling health checks
+- Update minimal driver version to 384.81
+
 ### Version 1.0.0-beta4
 
 - Fixes a bug with a nil pointer dereference around `getDevices:CPUAffinity`
diff --git a/RELEASE.md b/RELEASE.md
@@ -11,6 +11,7 @@ Publishing the container is automated through gitlab-ci and only requires on to
 - [ ] Update the README changelog
 
 - [ ] Update the device plugin (1.16+) to use the new container version (nvidia-device-plugin.yml)
+- [ ] Update the device plugin compatible with the CPUManager (1.16+) to use the new container version (nvidia-device-plugin-compat-with-cpumanager.yml)
 - [ ] Update the legacy device plugin (pre 1.16) to use the new container version (extensions-v1beta1-nvidia-device-plugin.yml)
 - [ ] Commit, Tag and Push to Gitlab
 - [ ] Trigger the [multi arch manifest CI](https://gitlab.com/nvidia/container-images/dockerhub-manifests)
diff --git a/extensions-v1beta1-nvidia-device-plugin.yml b/extensions-v1beta1-nvidia-device-plugin.yml
@@ -43,7 +43,7 @@ spec:
       # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
       priorityClassName: "system-node-critical"
       containers:
-      - image: nvidia/k8s-device-plugin:1.0.0-beta4
+      - image: nvidia/k8s-device-plugin:1.0.0-beta5
         name: nvidia-device-plugin-ctr
         securityContext:
           allowPrivilegeEscalation: false
diff --git a/nvidia-device-plugin-compat-with-cpumanager.yml b/nvidia-device-plugin-compat-with-cpumanager.yml
@@ -0,0 +1,60 @@
+# Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: nvidia-device-plugin-daemonset
+  namespace: kube-system
+spec:
+  selector:
+    matchLabels:
+      name: nvidia-device-plugin-ds
+  updateStrategy:
+    type: RollingUpdate
+  template:
+    metadata:
+      # This annotation is deprecated. Kept here for backward compatibility
+      # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
+      annotations:
+        scheduler.alpha.kubernetes.io/critical-pod: ""
+      labels:
+        name: nvidia-device-plugin-ds
+    spec:
+      tolerations:
+      # This toleration is deprecated. Kept here for backward compatibility
+      # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
+      - key: CriticalAddonsOnly
+        operator: Exists
+      - key: nvidia.com/gpu
+        operator: Exists
+        effect: NoSchedule
+      # Mark this pod as a critical add-on; when enabled, the critical add-on
+      # scheduler reserves resources for critical add-on pods so that they can
+      # be rescheduled after a failure.
+      # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
+      priorityClassName: "system-node-critical"
+      containers:
+      - image: nvidia/k8s-device-plugin:1.0.0-beta5
+        name: nvidia-device-plugin-ctr
+        args: ["--pass-device-specs"]
+        securityContext:
+          privileged: true
+        volumeMounts:
+          - name: device-plugin
+            mountPath: /var/lib/kubelet/device-plugins
+      volumes:
+        - name: device-plugin
+          hostPath:
+            path: /var/lib/kubelet/device-plugins
diff --git a/nvidia-device-plugin.yml b/nvidia-device-plugin.yml
@@ -46,7 +46,7 @@ spec:
       # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
       priorityClassName: "system-node-critical"
       containers:
-      - image: nvidia/k8s-device-plugin:1.0.0-beta4
+      - image: nvidia/k8s-device-plugin:1.0.0-beta5
         name: nvidia-device-plugin-ctr
         securityContext:
           allowPrivilegeEscalation: false