From d0ad0cb4993cc99893f2d6590ebd1c83e791f1f4 Mon Sep 17 00:00:00 2001 From: "wangjianyu.wjy" Date: Wed, 4 Dec 2024 15:40:23 +0800 Subject: [PATCH] koordlet: supply rdma device Signed-off-by: wangjianyu.wjy --- docker/koordlet.dockerfile | 2 +- go.mod | 8 ++ go.sum | 15 +++ pkg/features/koordlet_features.go | 7 ++ .../metricsadvisor/devices/helper/sriov.go | 100 ++++++++++++++++ .../metricsadvisor/devices/helper/topology.go | 87 ++++++++++++++ .../devices/helper/topology_test.go | 87 ++++++++++++++ .../devices/rdma/collector_rdma.go | 77 +++++++++++++ .../metricsadvisor/devices/rdma/net.go | 108 ++++++++++++++++++ .../metricsadvisor/plugins_profile.go | 4 +- .../impl/states_device_linux.go | 77 +++++++++++-- .../impl/states_device_linux_test.go | 13 ++- pkg/koordlet/util/device.go | 31 ++++- 13 files changed, 604 insertions(+), 12 deletions(-) create mode 100644 pkg/koordlet/metricsadvisor/devices/helper/sriov.go create mode 100644 pkg/koordlet/metricsadvisor/devices/helper/topology.go create mode 100644 pkg/koordlet/metricsadvisor/devices/helper/topology_test.go create mode 100644 pkg/koordlet/metricsadvisor/devices/rdma/collector_rdma.go create mode 100644 pkg/koordlet/metricsadvisor/devices/rdma/net.go diff --git a/docker/koordlet.dockerfile b/docker/koordlet.dockerfile index 1a89bfa9d..3afac2cdb 100644 --- a/docker/koordlet.dockerfile +++ b/docker/koordlet.dockerfile @@ -35,7 +35,7 @@ RUN go build -a -o koordlet cmd/koordlet/main.go FROM --platform=$TARGETPLATFORM nvidia/cuda:11.8.0-base-ubuntu22.04 WORKDIR / -RUN apt-get update && apt-get install -y lvm2 iptables && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y lvm2 iptables pciutils && rm -rf /var/lib/apt/lists/* COPY --from=builder /go/src/github.com/koordinator-sh/koordinator/koordlet . COPY --from=builder /usr/local/lib /usr/lib ENTRYPOINT ["/koordlet"] diff --git a/go.mod b/go.mod index c2714b35e..e0caf0dc4 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/koordinator-sh/koordinator go 1.20 require ( + github.com/Mellanox/rdmamap v1.1.0 github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82 github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5 github.com/containerd/nri v0.6.1 @@ -19,6 +20,7 @@ require ( github.com/golang/protobuf v1.5.3 github.com/google/go-cmp v0.5.9 github.com/google/uuid v1.3.0 + github.com/jaypipes/ghw v0.12.0 github.com/jedib0t/go-pretty/v6 v6.4.0 github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.1 github.com/mohae/deepcopy v0.0.0-20170603005431-491d3605edfb @@ -65,23 +67,29 @@ require ( require ( cloud.google.com/go/compute/metadata v0.2.3 // indirect + github.com/StackExchange/wmi v1.2.1 // indirect github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df // indirect github.com/cenkalti/backoff/v4 v4.2.1 // indirect github.com/containerd/containerd v1.6.9 // indirect github.com/containerd/ttrpc v1.2.3 // indirect github.com/emicklei/go-restful/v3 v3.11.0 // indirect github.com/evanphx/json-patch/v5 v5.6.0 // indirect + github.com/ghodss/yaml v1.0.0 // indirect github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/google/cel-go v0.16.1 // indirect github.com/google/gnostic-models v0.6.8 // indirect github.com/google/pprof v0.0.0-20220829040838-70bd9ae97f40 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.11.3 // indirect + github.com/jaypipes/pcidb v1.0.0 // indirect + github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/stoewer/go-strcase v1.2.0 // indirect golang.org/x/exp v0.0.0-20220827204233-334a2380cb91 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20230525234035-dd9d682886f9 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20230731190214-cbb8c96f2d6d // indirect + howett.net/plist v1.0.0 // indirect k8s.io/controller-manager v0.28.7 // indirect k8s.io/dynamic-resource-allocation v0.28.7 // indirect k8s.io/gengo v0.0.0-20220902162205-c0856e24416d // indirect diff --git a/go.sum b/go.sum index 7cd2878e1..b2ee01b79 100644 --- a/go.sum +++ b/go.sum @@ -233,6 +233,8 @@ github.com/JeffAshton/win_pdh v0.0.0-20161109143554-76bb4ee9f0ab h1:UKkYhof1njT1 github.com/JeffAshton/win_pdh v0.0.0-20161109143554-76bb4ee9f0ab/go.mod h1:3VYc5hodBMJ5+l/7J4xAyMeuM2PNuepvHlGs8yilUCA= github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk= github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= +github.com/Mellanox/rdmamap v1.1.0 h1:A/W1wAXw+6vm58f3VklrIylgV+eDJlPVIMaIKuxgUT4= +github.com/Mellanox/rdmamap v1.1.0/go.mod h1:fN+/V9lf10ABnDCwTaXRjeeWijLt2iVLETnK+sx/LY8= github.com/Microsoft/go-winio v0.4.11/go.mod h1:VhR8bwka0BXejwEJY73c50VrPtXAaKcyvVC4A4RozmA= github.com/Microsoft/go-winio v0.4.14/go.mod h1:qXqCSQ3Xa7+6tgxaGTIe4Kpcdsi+P8jBhyzoq1bpyYA= github.com/Microsoft/go-winio v0.4.15-0.20190919025122-fc70bd9a86b5/go.mod h1:tTuCMEN+UleMWgg9dVx4Hu52b1bJo+59jBh3ajtinzw= @@ -264,6 +266,8 @@ github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cq github.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d/go.mod h1:HI8ITrYtUY+O+ZhtlqUnD8+KwNPOyugEhfP9fdUIaEQ= +github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA= +github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8= github.com/a8m/tree v0.0.0-20210115125333-10a5fd5b637d/go.mod h1:FSdwKX97koS5efgm8WevNf7XS3PqtyFkKDDXrz778cg= github.com/ajstarks/deck v0.0.0-20200831202436-30c9fc6549a9/go.mod h1:JynElWSGnm/4RlzPXRlREEwqTHAN3T56Bv2ITsFT3gY= github.com/ajstarks/deck/generate v0.0.0-20210309230005-c3f852c02e19/go.mod h1:T13YZdzov6OU0A1+RfKZiZN9ca6VeKdBdyDV+BY97Tk= @@ -586,6 +590,7 @@ github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbS github.com/fullsailor/pkcs7 v0.0.0-20190404230743-d7302db945fa/go.mod h1:KnogPXtdwXqoenmZCw6S+25EAm2MkxbG0deNDu4cbSA= github.com/fvbommel/sortorder v1.1.0/go.mod h1:uk88iVf1ovNn1iLfgUVU2F9o5eO30ui720w+kxuqRs0= github.com/garyburd/redigo v0.0.0-20150301180006-535138d7bcd7/go.mod h1:NR3MbYisc3/PwhQ00EMzDiPmrwpPxAn5GI05/YaO1SY= +github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= @@ -621,6 +626,9 @@ github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre github.com/go-logr/zapr v1.2.3/go.mod h1:eIauM6P8qSvTw5o2ez6UEAfGjQKrxQTl5EoK+Qa2oG4= github.com/go-logr/zapr v1.2.4 h1:QHVo+6stLbfJmYGkQ7uGHUCu5hnAFAj6mDe6Ea0SeOo= github.com/go-logr/zapr v1.2.4/go.mod h1:FyHWQIzQORZ0QVE1BtVHv3cKtNLuXsbNLtpuhNapBOA= +github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= +github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= +github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= github.com/go-openapi/jsonreference v0.20.1/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= @@ -852,6 +860,10 @@ github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLf github.com/ionos-cloud/sdk-go/v6 v6.1.3 h1:vb6yqdpiqaytvreM0bsn2pXw+1YDvEk2RKSmBAQvgDQ= github.com/ishidawataru/sctp v0.0.0-20190723014705-7c296d48a2b5/go.mod h1:DM4VvS+hD/kDi1U1QsX2fnZowwBhqD0Dk3bRPKF/Oc8= github.com/j-keck/arping v0.0.0-20160618110441-2cf9dc699c56/go.mod h1:ymszkNOg6tORTn+6F6j+Jc8TOr5osrynvN6ivFWZ2GA= +github.com/jaypipes/ghw v0.12.0 h1:xU2/MDJfWmBhJnujHY9qwXQLs3DBsf0/Xa9vECY0Tho= +github.com/jaypipes/ghw v0.12.0/go.mod h1:jeJGbkRB2lL3/gxYzNYzEDETV1ZJ56OKr+CSeSEym+g= +github.com/jaypipes/pcidb v1.0.0 h1:vtZIfkiCUE42oYbJS0TAq9XSfSmcsgo9IdxSm9qzYU8= +github.com/jaypipes/pcidb v1.0.0/go.mod h1:TnYUvqhPBzCKnH34KrIX22kAeEbDCSRJ9cqLRCuNDfk= github.com/jedib0t/go-pretty/v6 v6.4.0 h1:YlI/2zYDrweA4MThiYMKtGRfT+2qZOO65ulej8GTcVI= github.com/jedib0t/go-pretty/v6 v6.4.0/go.mod h1:MgmISkTWDSFu0xOqiZ0mKNntMQ2mDgOcwOkwBEkMDJI= github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= @@ -1859,6 +1871,7 @@ gopkg.in/square/go-jose.v2 v2.6.0/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME= gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= +gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= @@ -1883,6 +1896,8 @@ honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.1.3/go.mod h1:NgwopIslSNH47DimFoV78dnkksY2EFtX0ajyb3K/las= +howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM= +howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g= k8s.io/api v0.28.7 h1:YKIhBxjXKaxuxWJnwohV0aGjRA5l4IU0Eywf/q19AVI= k8s.io/api v0.28.7/go.mod h1:y4RbcjCCMff1930SG/TcP3AUKNfaJUgIeUp58e/2vyY= k8s.io/apiextensions-apiserver v0.28.7 h1:NQlzP/vmvIO9Qt7wQTdMe9sGWGkozQZMPk9suehAvR8= diff --git a/pkg/features/koordlet_features.go b/pkg/features/koordlet_features.go index cb76621ac..077cba45e 100644 --- a/pkg/features/koordlet_features.go +++ b/pkg/features/koordlet_features.go @@ -104,6 +104,12 @@ const ( // Accelerators enables GPU related feature in koordlet. Only Nvidia GPUs supported. Accelerators featuregate.Feature = "Accelerators" + // owner: @ZiMengSheng + // alpha: v0.6 + // + // NetDevices enables RDMA related feature in koordlet. + RDMADevices featuregate.Feature = "RDMADevices" + // owner: @songtao98 @zwzhang0107 // alpha: v1.0 // @@ -164,6 +170,7 @@ var ( CgroupReconcile: {Default: false, PreRelease: featuregate.Alpha}, NodeTopologyReport: {Default: true, PreRelease: featuregate.Beta}, Accelerators: {Default: false, PreRelease: featuregate.Alpha}, + RDMADevices: {Default: false, PreRelease: featuregate.Alpha}, CPICollector: {Default: false, PreRelease: featuregate.Alpha}, Libpfm4: {Default: false, PreRelease: featuregate.Alpha}, PSICollector: {Default: false, PreRelease: featuregate.Alpha}, diff --git a/pkg/koordlet/metricsadvisor/devices/helper/sriov.go b/pkg/koordlet/metricsadvisor/devices/helper/sriov.go new file mode 100644 index 000000000..c32692d7f --- /dev/null +++ b/pkg/koordlet/metricsadvisor/devices/helper/sriov.go @@ -0,0 +1,100 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package helper + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "sort" + "strconv" + "strings" +) + +const ( + configuredVfFile = "sriov_numvfs" +) + +// SriovConfigured returns true if sriov_numvfs reads > 0 else false +func SriovConfigured(addr string) bool { + return GetVConfigured(addr) > 0 +} + +func extractNumber(pfDir string, s string) int { + num, _ := strconv.Atoi(strings.TrimPrefix(s, fmt.Sprintf("%s/virtfn", pfDir))) + return num +} + +// GetVFList returns a List containing PCI addr for all VF discovered in a given PF +func GetVFList(pf string) (vfList []string, err error) { + vfList = make([]string, 0) + pfDir := filepath.Join(SysBusPci, pf) + _, err = os.Lstat(pfDir) + if err != nil { + err = fmt.Errorf("error. Could not get PF directory information for device: %s, Err: %v", pf, err) + return + } + + vfDirs, err := filepath.Glob(filepath.Join(pfDir, "virtfn*")) + + if err != nil { + err = fmt.Errorf("error reading VF directories %v", err) + return + } + //TODO 排序 + sort.Slice(vfDirs, func(i, j int) bool { + return extractNumber(pfDir, vfDirs[i]) < extractNumber(pfDir, vfDirs[j]) + }) + + // Read all VF directory and get add VF PCI addr to the vfList + for _, dir := range vfDirs { + dirInfo, err := os.Lstat(dir) + if err == nil && (dirInfo.Mode()&os.ModeSymlink != 0) { + linkName, err := filepath.EvalSymlinks(dir) + if err == nil { + vfLink := filepath.Base(linkName) + vfList = append(vfList, vfLink) + } + } + } + return +} + +// GetVConfigured returns number of VF configured for a PF +func GetVConfigured(pf string) int { + configuredVfPath := filepath.Join(SysBusPci, pf, configuredVfFile) + vfs, err := os.ReadFile(configuredVfPath) + if err != nil { + return 0 + } + configuredVFs := bytes.TrimSpace(vfs) + numConfiguredVFs, err := strconv.Atoi(string(configuredVFs)) + if err != nil { + return 0 + } + return numConfiguredVFs +} + +// IsSriovVF check if a pci device has link to a PF +func IsSriovVF(pciAddr string) bool { + totalVfFilePath := filepath.Join(SysBusPci, pciAddr, "physfn") + if _, err := os.Stat(totalVfFilePath); err != nil { + return false + } + return true +} diff --git a/pkg/koordlet/metricsadvisor/devices/helper/topology.go b/pkg/koordlet/metricsadvisor/devices/helper/topology.go new file mode 100644 index 000000000..297d46c38 --- /dev/null +++ b/pkg/koordlet/metricsadvisor/devices/helper/topology.go @@ -0,0 +1,87 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package helper + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "regexp" + "strconv" + + "k8s.io/klog/v2" + + "github.com/koordinator-sh/koordinator/pkg/koordlet/util/system" +) + +var ( + SysBusPci = "/sys/bus/pci/devices" + DevDir = "/dev" + + pcieRegexp = regexp.MustCompile(`pci\d{4}:[0-9a-fA-F]{2}`) +) + +func ParsePCIInfo(busID string) (int32, string, string, error) { + //klog.V(4).Infof("ParsePCIInfo: busID=%s", busID) + nodeID, err := getNUMANodeID(busID) + if err != nil { + //klog.V(4).Infof("ParsePCIInfo: return err:%v busid:", err, busID) + return 0, "", "", fmt.Errorf("failed to parse NUMA Node ID, err: %w", err) + } + //klog.V(4).Infof("ParsePCIInfo: go on") + pcie, err := getPCIERootComplexID(busID) + if err != nil { + //klog.V(4).Infof("failed to parse PCIE ID, err: %v", err) + return 0, "", "", fmt.Errorf("failed to parse PCIE ID, err: %w", err) + } + //klog.V(4).Infof("ParsePCIInfo: nodeID=%s pcie=%s busID=%s", nodeID, pcie, busID) + return nodeID, pcie, busID, nil +} + +func getPCIERootComplexID(bdf string) (string, error) { + path, err := filepath.EvalSymlinks(filepath.Join(system.GetPCIDeviceDir(), bdf)) + if err != nil { + return "", err + } + return parsePCIEID(path), err +} + +func parsePCIEID(path string) string { + result := pcieRegexp.FindAllStringSubmatch(path, -1) + if len(result) == 0 || len(result[0]) == 0 { + return "" + } + return result[0][0] +} + +func getNUMANodeID(bdf string) (int32, error) { + path := filepath.Join(system.GetPCIDeviceDir(), bdf, "numa_node") + klog.V(4).Infof("ParsePCIInfo: path=%s", path) + data, err := os.ReadFile(filepath.Join(system.GetPCIDeviceDir(), bdf, "numa_node")) + if err != nil { + return -1, err + } + nodeID, err := strconv.Atoi(string(bytes.TrimSpace(data))) + if err != nil { + return 0, err + } + if nodeID == -1 { + nodeID = 0 + } + return int32(nodeID), nil +} diff --git a/pkg/koordlet/metricsadvisor/devices/helper/topology_test.go b/pkg/koordlet/metricsadvisor/devices/helper/topology_test.go new file mode 100644 index 000000000..4cfa2626c --- /dev/null +++ b/pkg/koordlet/metricsadvisor/devices/helper/topology_test.go @@ -0,0 +1,87 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package helper + +import ( + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/koordinator-sh/koordinator/pkg/koordlet/util/system" +) + +func Test_parseGPUPCIInfo(t *testing.T) { + tests := []struct { + name string + busID string + nodeID int32 + pcie string + wantNode int32 + wantPCIE string + wantBusID string + wantErr bool + }{ + { + name: "numa node -1", + busID: "0000:00:07.0", + nodeID: -1, + pcie: "pci0000:00", + wantNode: 0, + wantPCIE: "pci0000:00", + wantBusID: "0000:00:07.0", + }, + { + name: "numa node 1", + busID: "0000:00:07.0", + nodeID: 1, + pcie: "pci0000:00", + wantNode: 1, + wantPCIE: "pci0000:00", + wantBusID: "0000:00:07.0", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + helper := system.NewFileTestUtil(t) + defer helper.Cleanup() + + pciDeviceDir := system.GetPCIDeviceDir() + gpuDeviceDir := filepath.Join(pciDeviceDir, tt.pcie, tt.busID) + assert.NoError(t, os.MkdirAll(gpuDeviceDir, 0700)) + assert.NoError(t, os.WriteFile(filepath.Join(gpuDeviceDir, "numa_node"), []byte(fmt.Sprintf("%d\n", tt.nodeID)), 0700)) + + symbolicLink := filepath.Join(pciDeviceDir, tt.busID) + assert.NoError(t, os.Symlink(gpuDeviceDir, symbolicLink)) + + var busIdLegacy [16]int8 + for i, v := range tt.busID { + busIdLegacy[i] = int8(v) + } + nodeID, pcie, busID, err := ParsePCIInfo(tt.busID) + if (err != nil) && !tt.wantErr { + t.Errorf("expect wantErr=%v but got err=%v", tt.wantErr, err) + return + } + assert.Equal(t, tt.wantNode, nodeID) + assert.Equal(t, tt.wantPCIE, pcie) + assert.Equal(t, tt.wantBusID, busID) + }) + } +} diff --git a/pkg/koordlet/metricsadvisor/devices/rdma/collector_rdma.go b/pkg/koordlet/metricsadvisor/devices/rdma/collector_rdma.go new file mode 100644 index 000000000..a32578835 --- /dev/null +++ b/pkg/koordlet/metricsadvisor/devices/rdma/collector_rdma.go @@ -0,0 +1,77 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rdma + +import ( + corev1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" + + "github.com/koordinator-sh/koordinator/pkg/features" + "github.com/koordinator-sh/koordinator/pkg/koordlet/metriccache" + "github.com/koordinator-sh/koordinator/pkg/koordlet/metricsadvisor/framework" +) + +const ( + DeviceCollectorName = "RDMA" +) + +type rdmaCollector struct { + enabled bool +} + +func New(opt *framework.Options) framework.DeviceCollector { + return &rdmaCollector{ + enabled: features.DefaultKoordletFeatureGate.Enabled(features.RDMADevices), + } +} + +func (g *rdmaCollector) Shutdown() { +} + +func (g *rdmaCollector) Enabled() bool { + return g.enabled +} + +func (g *rdmaCollector) Setup(fra *framework.Context) { +} + +func (g *rdmaCollector) Run(stopCh <-chan struct{}) { +} + +func (g *rdmaCollector) Started() bool { + return true +} + +func (g *rdmaCollector) Infos() metriccache.Devices { + netDevices, err := GetNetDevice() + if err != nil { + klog.Errorf("failed to get net device: %v", err) + } + return netDevices +} + +func (g *rdmaCollector) GetNodeMetric() ([]metriccache.MetricSample, error) { + return nil, nil +} + +func (g *rdmaCollector) GetPodMetric(uid, podParentDir string, cs []corev1.ContainerStatus) ([]metriccache.MetricSample, error) { + return nil, nil +} + +func (g *rdmaCollector) GetContainerMetric(containerID, podParentDir string, c *corev1.ContainerStatus) ([]metriccache.MetricSample, error) { + return nil, nil +} diff --git a/pkg/koordlet/metricsadvisor/devices/rdma/net.go b/pkg/koordlet/metricsadvisor/devices/rdma/net.go new file mode 100644 index 000000000..b800818cc --- /dev/null +++ b/pkg/koordlet/metricsadvisor/devices/rdma/net.go @@ -0,0 +1,108 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rdma + +import ( + "fmt" + "strconv" + + "github.com/Mellanox/rdmamap" + "github.com/jaypipes/ghw" + "k8s.io/klog/v2" + + "github.com/koordinator-sh/koordinator/pkg/koordlet/metriccache" + "github.com/koordinator-sh/koordinator/pkg/koordlet/metricsadvisor/devices/helper" + "github.com/koordinator-sh/koordinator/pkg/koordlet/util" +) + +func GetNetDevice() (metriccache.Devices, error) { + pci, err := ghw.PCI() + if err != nil { + return nil, fmt.Errorf("getNetDevice(): new PCI instance error, %v", err) + } + devices := pci.ListDevices() + if len(devices) == 0 { + klog.Warningf("getNetDevice(): no pci devices") + return nil, nil + } + var netDevices util.RDMADevices + for _, device := range devices { + if !IsNetDevice(device.Class.ID) || helper.IsSriovVF(device.Address) { + continue + } + netDevice := util.RDMADeviceInfo{ + ID: device.Address, + RDMAResources: rdmamap.GetRdmaDevicesForPcidev(device.Address), + VFEnabled: helper.SriovConfigured(device.Address), + VFMap: nil, + Minor: 0, + Labels: nil, + VendorCode: device.Vendor.ID, + DeviceCode: device.Product.ID, + BusID: device.Address, + } + if len(netDevice.RDMAResources) == 0 { + klog.Warningf("getNetDevice(): no rdma device for pci device %s", device.Address) + continue + } + nodeID, pcie, _, err := helper.ParsePCIInfo(device.Address) + if err != nil { + klog.Errorf("getNetDevice(): parse pci device %s error, %v", device.Address, err) + return nil, err + } + netDevice.NodeID = nodeID + netDevice.PCIE = pcie + if netDevice.VFEnabled { + vfList, err := helper.GetVFList(netDevice.ID) + if err != nil { + return nil, err + } + for _, vfBDF := range vfList { + vf := util.VirtualFunction{ + ID: vfBDF, + } + if netDevice.VFMap == nil { + netDevice.VFMap = map[string]*util.VirtualFunction{} + } + netDevice.VFMap[vf.ID] = &vf + } + } + netDevices = append(netDevices, netDevice) + } + klog.Infof("rdma netDevices: %+v", netDevices) + return netDevices, nil +} + +const ( + classIDBaseInt = 16 + classIDBitSize = 64 + netDevClassID = 0x02 +) + +func IsNetDevice(devClassID string) bool { + devClass, err := parseDeviceClassID(devClassID) + if err != nil { + klog.Warningf("getNetDevice(): unable to parse device class for device %+v %q", devClassID, err) + return false + } + return devClass == netDevClassID +} + +// parseDeviceClassID returns device ID parsed from the string as 64bit integer +func parseDeviceClassID(deviceID string) (int64, error) { + return strconv.ParseInt(deviceID, classIDBaseInt, classIDBitSize) +} diff --git a/pkg/koordlet/metricsadvisor/plugins_profile.go b/pkg/koordlet/metricsadvisor/plugins_profile.go index dce4af89a..5d096bd7b 100644 --- a/pkg/koordlet/metricsadvisor/plugins_profile.go +++ b/pkg/koordlet/metricsadvisor/plugins_profile.go @@ -29,6 +29,7 @@ import ( "github.com/koordinator-sh/koordinator/pkg/koordlet/metricsadvisor/collectors/podthrottled" "github.com/koordinator-sh/koordinator/pkg/koordlet/metricsadvisor/collectors/sysresource" "github.com/koordinator-sh/koordinator/pkg/koordlet/metricsadvisor/devices/gpu" + "github.com/koordinator-sh/koordinator/pkg/koordlet/metricsadvisor/devices/rdma" "github.com/koordinator-sh/koordinator/pkg/koordlet/metricsadvisor/framework" ) @@ -36,7 +37,8 @@ import ( var ( devicePlugins = map[string]framework.DeviceFactory{ - gpu.DeviceCollectorName: gpu.New, + gpu.DeviceCollectorName: gpu.New, + rdma.DeviceCollectorName: rdma.New, } collectorPlugins = map[string]framework.CollectorFactory{ diff --git a/pkg/koordlet/statesinformer/impl/states_device_linux.go b/pkg/koordlet/statesinformer/impl/states_device_linux.go index 04c6e9117..b89d94688 100644 --- a/pkg/koordlet/statesinformer/impl/states_device_linux.go +++ b/pkg/koordlet/statesinformer/impl/states_device_linux.go @@ -22,6 +22,8 @@ import ( "sort" "strings" + "k8s.io/utils/pointer" + "github.com/NVIDIA/go-nvml/pkg/nvml" corev1 "k8s.io/api/core/v1" apiequality "k8s.io/apimachinery/pkg/api/equality" @@ -42,15 +44,21 @@ func (s *statesInformer) reportDevice() { klog.Errorf("node is nil") return } - gpuDevices := s.buildGPUDevice() - if len(gpuDevices) == 0 { - return - } - - gpuModel, gpuDriverVer := s.getGPUDriverAndModelFunc() - device := s.buildBasicDevice(node) - s.fillGPUDevice(device, gpuDevices, gpuModel, gpuDriverVer) + func() { + gpuDevices := s.buildGPUDevice() + if len(gpuDevices) == 0 { + return + } + gpuModel, gpuDriverVer := s.getGPUDriverAndModelFunc() + s.fillGPUDevice(device, gpuDevices, gpuModel, gpuDriverVer) + }() + func() { + rdmaDevices := s.buildRDMADevice() + if len(rdmaDevices) != 0 { + device.Spec.Devices = append(device.Spec.Devices, rdmaDevices...) + } + }() err := s.updateDevice(device) if err == nil { @@ -189,6 +197,59 @@ func (s *statesInformer) buildGPUDevice() []schedulingv1alpha1.DeviceInfo { return deviceInfos } +func (s *statesInformer) buildRDMADevice() []schedulingv1alpha1.DeviceInfo { + rawRDMADevices, exist := s.metricsCache.Get(koordletuti.RDMADeviceType) + if !exist { + klog.V(4).Infof("rdma device not exist") + return nil + } + rdmaDevices := rawRDMADevices.(koordletuti.RDMADevices) + var deviceInfos []schedulingv1alpha1.DeviceInfo + for idx := range rdmaDevices { + rdma := rdmaDevices[idx] + deviceInfo := schedulingv1alpha1.DeviceInfo{ + UUID: rdma.ID, + Minor: pointer.Int32(0), + Type: schedulingv1alpha1.RDMA, + Health: true, + Resources: map[corev1.ResourceName]resource.Quantity{ + extension.ResourceRDMA: *resource.NewQuantity(100, resource.DecimalSI), + }, + Topology: &schedulingv1alpha1.DeviceTopology{ + SocketID: -1, + NodeID: rdma.NodeID, + PCIEID: rdma.PCIE, + BusID: rdma.BusID, + }, + } + if rdma.VFEnabled { + var vfs []schedulingv1alpha1.VirtualFunction + for _, vf := range rdma.VFMap { + vfs = append(vfs, schedulingv1alpha1.VirtualFunction{ + Minor: -1, + BusID: vf.ID, + }) + } + sort.Slice(vfs, func(i, j int) bool { + return vfs[i].BusID < vfs[j].BusID + }) + deviceInfo.VFGroups = append(deviceInfo.VFGroups, schedulingv1alpha1.VirtualFunctionGroup{ + Labels: nil, + VFs: vfs, + }) + } + deviceInfos = append(deviceInfos, deviceInfo) + } + + sort.Slice(deviceInfos, func(i, j int) bool { + return deviceInfos[i].UUID < deviceInfos[j].UUID + }) + for i := range deviceInfos { + deviceInfos[i].Minor = pointer.Int32(int32(i)) + } + return deviceInfos +} + func (s *statesInformer) initGPU() bool { if ret := nvml.Init(); ret != nvml.SUCCESS { if ret == nvml.ERROR_LIBRARY_NOT_FOUND { diff --git a/pkg/koordlet/statesinformer/impl/states_device_linux_test.go b/pkg/koordlet/statesinformer/impl/states_device_linux_test.go index 2150f2385..3033919f5 100644 --- a/pkg/koordlet/statesinformer/impl/states_device_linux_test.go +++ b/pkg/koordlet/statesinformer/impl/states_device_linux_test.go @@ -116,8 +116,19 @@ func Test_reportGPUDevice(t *testing.T) { Minor: 4, MemoryTotal: 10000, }) - + rdmaDeviceInfo := []koordletutil.RDMADeviceInfo{ + { + BusID: "0000:00:09.0", + DeviceCode: "0000", + ID: "0000:00:09.0", + Labels: map[string]string{"label1": "value1"}, + Minor: 5, + NetDev: "ib0", + NodeID: 0, + }, + } mockMetricCache.EXPECT().Get(koordletutil.GPUDeviceType).Return(gpuDeviceInfo, true) + mockMetricCache.EXPECT().Get(koordletutil.RDMADeviceType).Return(rdmaDeviceInfo, true) r.reportDevice() expectedDevices = append(expectedDevices, schedulingv1alpha1.DeviceInfo{ diff --git a/pkg/koordlet/util/device.go b/pkg/koordlet/util/device.go index eac73a7d4..f8cfeaf8c 100644 --- a/pkg/koordlet/util/device.go +++ b/pkg/koordlet/util/device.go @@ -19,7 +19,8 @@ package util type DeviceType string const ( - GPUDeviceType DeviceType = "GPU" + GPUDeviceType DeviceType = "GPU" + RDMADeviceType DeviceType = "RDMA" ) type Devices interface { @@ -42,3 +43,31 @@ type GPUDeviceInfo struct { PCIE string `json:"pcie,omitempty"` BusID string `json:"busID,omitempty"` } + +type RDMADevices []RDMADeviceInfo + +func (r RDMADevices) Type() DeviceType { + return RDMADeviceType +} + +type RDMADeviceInfo struct { + ID string `json:"id,omitempty"` + NetDev string `json:"netDev,omitempty"` + MasterNetDev *string `json:"masterNetDev,omitempty"` + RDMAResources []string `json:"rdmaResources"` + VFEnabled bool `json:"vfEnabled,omitempty"` + VFMap map[string]*VirtualFunction `json:"vfMap,omitempty"` // busId:VirtualFunction + Labels map[string]string `json:"labels,omitempty"` + Minor int32 `json:"minor"` + VendorCode string `json:"vendorCode,omitempty"` + DeviceCode string `json:"deviceCode,omitempty"` + NodeID int32 `json:"nodeID,omitempty"` + PCIE string `json:"pcie,omitempty"` + BusID string `json:"busID,omitempty"` +} + +type VirtualFunction struct { + ID string `json:"id,omitempty"` + Labels map[string]string `json:"labels,omitempty"` + CustomInfo interface{} `json:"customInfo,omitempty"` +}