Skip to content

Commit dd78f3d

Browse files
committed
Remove nvidia.com/gpu.imex-domain label
This removes the logic for constructing an nvidia.com/gpu.imex-domain label. This also means that the logic to include the imex nodes_config.cfg file in the GFD container is not required. Signed-off-by: Evan Lezar <[email protected]>
1 parent 243e8f1 commit dd78f3d

File tree

4 files changed

+2
-168
lines changed

4 files changed

+2
-168
lines changed

deployments/helm/nvidia-device-plugin/templates/daemonset-gfd.yml

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -59,32 +59,6 @@ spec:
5959
shareProcessNamespace: true
6060
{{- end }}
6161
initContainers:
62-
- image: {{ include "nvidia-device-plugin.fullimage" . }}
63-
name: gpu-feature-discovery-imex-init
64-
command: ["/bin/bash", "-c"]
65-
args:
66-
- |
67-
IMEX_NODES_CONFIG_FILE=/etc/nvidia-imex/nodes_config.cfg
68-
if [[ -f /config/${IMEX_NODES_CONFIG_FILE} ]]; then
69-
echo "Removing cached IMEX nodes config"
70-
rm -f /config/${IMEX_NODES_CONFIG_FILE}
71-
fi
72-
73-
if [[ ! -f /driver-root/${IMEX_NODES_CONFIG_FILE} ]]; then
74-
echo "No IMEX nodes config path detected; Skipping"
75-
exit 0
76-
fi
77-
78-
echo "Copying IMEX nodes config"
79-
mkdir -p $(dirname /config/${IMEX_NODES_CONFIG_FILE})
80-
cp /driver-root/${IMEX_NODES_CONFIG_FILE} /config/${IMEX_NODES_CONFIG_FILE}
81-
volumeMounts:
82-
- name: config
83-
mountPath: /config
84-
- name: driver-root
85-
mountPath: /driver-root/etc
86-
subPath: etc
87-
readOnly: true
8862
{{- if $options.hasConfigMap }}
8963
- image: {{ include "nvidia-device-plugin.fullimage" . }}
9064
name: gpu-feature-discovery-init

docs/gpu-feature-discovery/README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,6 @@ For a similar list of labels generated or used by the device plugin, see [here](
221221
| nvidia.com/gpu.replicas | String | Number of GPU replicas available. Will be equal to the number of physical GPUs unless some sharing strategy is employed in which case the GPU count will be multiplied by replicas. | 4 |
222222
| nvidia.com/gpu.mode | String | Mode of the GPU. Can be either "compute" or "display". Details of the GPU modes can be found [here](https://docs.nvidia.com/grid/13.0/grid-gpumodeswitch-user-guide/index.html#compute-and-graphics-mode) | compute |
223223
| nvidia.com/gpu.clique | String | GPUFabric ClusterUUID + CliqueID | 7b968a6d-c8aa-45e1-9e07-e1e51be99c31.1 |
224-
| nvidia.com/gpu.imex-domain | String | IMEX domain Ip list(Hashed) + CliqueID | 79b326e7-d566-3483-c2a3-9b38fa5cb1c8.1 |
225224
226225
Depending on the MIG strategy used, the following set of labels may also be
227226
available (or override the default values for some of the labels listed above):

internal/lm/imex.go

Lines changed: 2 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -17,92 +17,26 @@
1717
package lm
1818

1919
import (
20-
"bufio"
21-
"errors"
2220
"fmt"
23-
"io"
24-
"net"
25-
"os"
26-
"path/filepath"
27-
"sort"
2821
"strings"
2922

30-
"github.com/google/uuid"
3123
"k8s.io/klog/v2"
3224

3325
spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1"
3426
"github.com/NVIDIA/k8s-device-plugin/internal/resource"
3527
)
3628

37-
const (
38-
// ImexNodesConfigFilePath is the path to the IMEX nodes config file.
39-
// This file contains a list of IP addresses of the nodes in the IMEX domain.
40-
ImexNodesConfigFilePath = "/etc/nvidia-imex/nodes_config.cfg"
41-
)
42-
4329
func newImexLabeler(config *spec.Config, devices []resource.Device) (Labeler, error) {
44-
var errs error
45-
for _, root := range imexNodesConfigFilePathSearchRoots(config) {
46-
configFilePath := filepath.Join(root, ImexNodesConfigFilePath)
47-
imexLabeler, err := imexLabelerForConfigFile(configFilePath, devices)
48-
if err != nil {
49-
errs = errors.Join(errs, err)
50-
continue
51-
}
52-
if imexLabeler != nil {
53-
klog.Infof("Using labeler for IMEX config %v", configFilePath)
54-
return imexLabeler, nil
55-
}
56-
}
57-
if errs != nil {
58-
return nil, errs
59-
}
60-
61-
return empty{}, nil
62-
}
63-
64-
// imexNodesConfigFilePathSearchRoots returns a list of roots to search for the IMEX nodes config file.
65-
func imexNodesConfigFilePathSearchRoots(config *spec.Config) []string {
66-
// By default, search / and /config for config files.
67-
roots := []string{"/", "/config"}
68-
69-
if config == nil || config.Flags.Plugin == nil || config.Flags.Plugin.ContainerDriverRoot == nil {
70-
return roots
71-
}
72-
73-
// If a driver root is specified, it is also searched.
74-
return append(roots, *config.Flags.Plugin.ContainerDriverRoot)
75-
}
76-
77-
func imexLabelerForConfigFile(configFilePath string, devices []resource.Device) (Labeler, error) {
78-
imexConfigFile, err := os.Open(configFilePath)
79-
if os.IsNotExist(err) {
80-
// No imex config file, return empty labels
81-
return nil, nil
82-
} else if err != nil {
83-
return nil, fmt.Errorf("failed to open imex config file: %v", err)
84-
}
85-
defer imexConfigFile.Close()
86-
8730
clusterUUID, cliqueID, err := getFabricIDs(devices)
8831
if err != nil {
8932
return nil, err
9033
}
9134
if clusterUUID == "" || cliqueID == "" {
92-
return nil, nil
93-
}
94-
95-
imexDomainID, err := getImexDomainID(imexConfigFile)
96-
if err != nil {
97-
return nil, err
98-
}
99-
if imexDomainID == "" {
100-
return nil, nil
35+
return empty{}, nil
10136
}
10237

10338
labels := Labels{
104-
"nvidia.com/gpu.clique": strings.Join([]string{clusterUUID, cliqueID}, "."),
105-
"nvidia.com/gpu.imex-domain": strings.Join([]string{imexDomainID, cliqueID}, "."),
39+
"nvidia.com/gpu.clique": strings.Join([]string{clusterUUID, cliqueID}, "."),
10640
}
10741

10842
return labels, nil
@@ -147,36 +81,3 @@ func getFabricIDs(devices []resource.Device) (string, string, error) {
14781
}
14882
return "", "", nil
14983
}
150-
151-
// getImexDomainID reads the imex config file and returns a unique identifier
152-
// based on the sorted list of IP addresses in the file.
153-
func getImexDomainID(r io.Reader) (string, error) {
154-
// Read the file line by line
155-
var ips []string
156-
scanner := bufio.NewScanner(r)
157-
for scanner.Scan() {
158-
ip := strings.TrimSpace(scanner.Text())
159-
if net.ParseIP(ip) == nil {
160-
return "", fmt.Errorf("invalid IP address in imex config file: %s", ip)
161-
}
162-
ips = append(ips, ip)
163-
}
164-
165-
if err := scanner.Err(); err != nil {
166-
return "", fmt.Errorf("failed to read imex config file: %v", err)
167-
}
168-
169-
if len(ips) == 0 {
170-
// No IPs in the file, return empty labels
171-
return "", nil
172-
}
173-
174-
sort.Strings(ips)
175-
176-
return generateContentUUID(strings.Join(ips, "\n")), nil
177-
178-
}
179-
180-
func generateContentUUID(seed string) string {
181-
return uuid.NewSHA1(uuid.Nil, []byte(seed)).String()
182-
}

internal/lm/imex_test.go

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -15,43 +15,3 @@
1515
**/
1616

1717
package lm
18-
19-
import (
20-
"strings"
21-
"testing"
22-
23-
"github.com/stretchr/testify/require"
24-
)
25-
26-
func TestGerenerateDomainUUID(t *testing.T) {
27-
testCases := []struct {
28-
description string
29-
ips []string
30-
expected string
31-
}{
32-
{
33-
description: "single IP",
34-
ips: []string{"10.130.3.24"},
35-
expected: "60ad7226-0130-54d0-b762-2a5385a3a26f",
36-
},
37-
{
38-
description: "multiple IPs",
39-
ips: []string{
40-
"10.130.3.24",
41-
"10.130.3.53",
42-
"10.130.3.23",
43-
"10.130.3.31",
44-
"10.130.3.27",
45-
"10.130.3.25",
46-
},
47-
expected: "8a7363e9-1003-5814-9354-175fdff19204",
48-
},
49-
}
50-
51-
for _, tc := range testCases {
52-
t.Run(tc.description, func(t *testing.T) {
53-
id := generateContentUUID(strings.Join(tc.ips, "\n"))
54-
require.Equal(t, tc.expected, id)
55-
})
56-
}
57-
}

0 commit comments

Comments
 (0)