-
Notifications
You must be signed in to change notification settings - Fork 731
Remove nvidia.com/gpu.imex-domain label #1152
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,92 +17,26 @@ | |
package lm | ||
|
||
import ( | ||
"bufio" | ||
"errors" | ||
"fmt" | ||
"io" | ||
"net" | ||
"os" | ||
"path/filepath" | ||
"sort" | ||
"strings" | ||
|
||
"github.com/google/uuid" | ||
"k8s.io/klog/v2" | ||
|
||
spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1" | ||
"github.com/NVIDIA/k8s-device-plugin/internal/resource" | ||
) | ||
|
||
const ( | ||
// ImexNodesConfigFilePath is the path to the IMEX nodes config file. | ||
// This file contains a list of IP addresses of the nodes in the IMEX domain. | ||
ImexNodesConfigFilePath = "/etc/nvidia-imex/nodes_config.cfg" | ||
) | ||
|
||
func newImexLabeler(config *spec.Config, devices []resource.Device) (Labeler, error) { | ||
var errs error | ||
for _, root := range imexNodesConfigFilePathSearchRoots(config) { | ||
configFilePath := filepath.Join(root, ImexNodesConfigFilePath) | ||
imexLabeler, err := imexLabelerForConfigFile(configFilePath, devices) | ||
if err != nil { | ||
errs = errors.Join(errs, err) | ||
continue | ||
} | ||
if imexLabeler != nil { | ||
klog.Infof("Using labeler for IMEX config %v", configFilePath) | ||
return imexLabeler, nil | ||
} | ||
} | ||
if errs != nil { | ||
return nil, errs | ||
} | ||
|
||
return empty{}, nil | ||
} | ||
|
||
// imexNodesConfigFilePathSearchRoots returns a list of roots to search for the IMEX nodes config file. | ||
func imexNodesConfigFilePathSearchRoots(config *spec.Config) []string { | ||
// By default, search / and /config for config files. | ||
roots := []string{"/", "/config"} | ||
|
||
if config == nil || config.Flags.Plugin == nil || config.Flags.Plugin.ContainerDriverRoot == nil { | ||
return roots | ||
} | ||
|
||
// If a driver root is specified, it is also searched. | ||
return append(roots, *config.Flags.Plugin.ContainerDriverRoot) | ||
} | ||
|
||
func imexLabelerForConfigFile(configFilePath string, devices []resource.Device) (Labeler, error) { | ||
imexConfigFile, err := os.Open(configFilePath) | ||
if os.IsNotExist(err) { | ||
// No imex config file, return empty labels | ||
return nil, nil | ||
} else if err != nil { | ||
return nil, fmt.Errorf("failed to open imex config file: %v", err) | ||
} | ||
defer imexConfigFile.Close() | ||
|
||
clusterUUID, cliqueID, err := getFabricIDs(devices) | ||
if err != nil { | ||
return nil, err | ||
} | ||
if clusterUUID == "" || cliqueID == "" { | ||
return nil, nil | ||
} | ||
|
||
imexDomainID, err := getImexDomainID(imexConfigFile) | ||
if err != nil { | ||
return nil, err | ||
} | ||
if imexDomainID == "" { | ||
return nil, nil | ||
return empty{}, nil | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In order to avoid explicit There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. Thanks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could we do the same for line (89/32) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
No. On that line we're returning an actual error and we want to return |
||
} | ||
|
||
labels := Labels{ | ||
"nvidia.com/gpu.clique": strings.Join([]string{clusterUUID, cliqueID}, "."), | ||
"nvidia.com/gpu.imex-domain": strings.Join([]string{imexDomainID, cliqueID}, "."), | ||
"nvidia.com/gpu.clique": strings.Join([]string{clusterUUID, cliqueID}, "."), | ||
} | ||
|
||
return labels, nil | ||
|
@@ -147,36 +81,3 @@ func getFabricIDs(devices []resource.Device) (string, string, error) { | |
} | ||
return "", "", nil | ||
} | ||
|
||
// getImexDomainID reads the imex config file and returns a unique identifier | ||
// based on the sorted list of IP addresses in the file. | ||
func getImexDomainID(r io.Reader) (string, error) { | ||
// Read the file line by line | ||
var ips []string | ||
scanner := bufio.NewScanner(r) | ||
for scanner.Scan() { | ||
ip := strings.TrimSpace(scanner.Text()) | ||
if net.ParseIP(ip) == nil { | ||
return "", fmt.Errorf("invalid IP address in imex config file: %s", ip) | ||
} | ||
ips = append(ips, ip) | ||
} | ||
|
||
if err := scanner.Err(); err != nil { | ||
return "", fmt.Errorf("failed to read imex config file: %v", err) | ||
} | ||
|
||
if len(ips) == 0 { | ||
// No IPs in the file, return empty labels | ||
return "", nil | ||
} | ||
|
||
sort.Strings(ips) | ||
|
||
return generateContentUUID(strings.Join(ips, "\n")), nil | ||
|
||
} | ||
|
||
func generateContentUUID(seed string) string { | ||
return uuid.NewSHA1(uuid.Nil, []byte(seed)).String() | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No. Here we are returning an actual error. Do you have a motivation as to why we want to ignore it?