Skip to content

Commit

Permalink
api: supply id for minor meaningless device (#2250)
Browse files Browse the repository at this point in the history
Signed-off-by: wangjianyu.wjy <[email protected]>
Co-authored-by: wangjianyu.wjy <[email protected]>
  • Loading branch information
ferris-cx and wangjianyu.wjy authored Nov 28, 2024
1 parent 6fa0388 commit 13a7107
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 13 deletions.
22 changes: 20 additions & 2 deletions apis/extension/device_share.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,32 @@ const (
"koordinator.sh/gpu-memory": "16Gi"
}
}
],
"rdma": [
{
"minor": 0,
"id": "0000:09:00.0",
"resources": {
"koordinator.sh/rdma": 100,
}
},
{
"minor": 1,
"id": "0000:10:00.0",
"resources": {
"koordinator.sh/rdma": 100,
}
}
]
}
*/
type DeviceAllocations map[schedulingv1alpha1.DeviceType][]*DeviceAllocation

type DeviceAllocation struct {
Minor int32 `json:"minor"`
Resources corev1.ResourceList `json:"resources"`
Minor int32 `json:"minor"`
Resources corev1.ResourceList `json:"resources"`
// ID is the well known identifier for device, because for some device, such as rdma, Minor is meaningless
ID string `json:"id,omitempty"`
Extension *DeviceAllocationExtension `json:"extension,omitempty"`
}

Expand Down
27 changes: 27 additions & 0 deletions apis/extension/device_share_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,33 @@ func Test_GetDeviceAllocations(t *testing.T) {
},
wantErr: false,
},
{
name: "correct annotations with busID",
pod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "123456789",
Namespace: "default",
Name: "test",
Annotations: map[string]string{
AnnotationDeviceAllocated: `{"gpu":[{"minor":1,"id": "0000:08.00.0","resources":{"koordinator.sh/gpu-core":"100","koordinator.sh/gpu-memory":"16Gi","koordinator.sh/gpu-memory-ratio":"100"}}]}`,
},
},
},
want: DeviceAllocations{
schedulingv1alpha1.GPU: []*DeviceAllocation{
{
Minor: 1,
ID: "0000:08.00.0",
Resources: corev1.ResourceList{
ResourceGPUCore: resource.MustParse("100"),
ResourceGPUMemoryRatio: resource.MustParse("100"),
ResourceGPUMemory: resource.MustParse("16Gi"),
},
},
},
},
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,34 +218,54 @@ resources:

##### DeviceAllocation

In the PreBind stage, the scheduler will update the device (including GPU) allocation results, including the device's Minor and resource allocation information, to the Pod in the form of annotations.
In the PreBind stage, the scheduler will update the device (including GPU) allocation results, including the device's Minor, ID, resource allocation information, to the Pod in the form of annotations.

```go
/*
{
"gpu": [
{
"minor": 0,
"resouurces": {
"resources": {
"koordinator.sh/gpu-core": 100,
"koordinator.sh/gpu-memory-ratio": 100,
"koordinator.sh/gpu-memory": "16Gi"
}
},
{
"minor": 1,
"resouurces": {
"resources": {
"koordinator.sh/gpu-core": 100,
"koordinator.sh/gpu-memory-ratio": 100,
"koordinator.sh/gpu-memory": "16Gi"
}
}
],
"rdma": [
{
"minor": 0,
"id": "0000:09:00.0",
"resources": {
"koordinator.sh/rdma": 100,
}
},
{
"minor": 1,
"id": "0000:10:00.0",
"resources": {
"koordinator.sh/rdma": 100,
}
}
]
}
*/
type DeviceAllocation struct {
Minor int32
Resources map[string]resource.Quantity
Minor int32 `json:"minor"`
Resources corev1.ResourceList `json:"resources"`
// ID is the well known identifier for device, because for some device, such as rdma, Minor is meaningless
ID string `json:"id,omitempty"`
Extension *DeviceAllocationExtension `json:"extension,omitempty"`
}

type DeviceAllocations map[DeviceType][]*DeviceAllocation
Expand Down
34 changes: 34 additions & 0 deletions pkg/scheduler/plugins/deviceshare/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"k8s.io/apimachinery/pkg/types"
quotav1 "k8s.io/apiserver/pkg/quota/v1"
k8sfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/scheduler/framework"

apiext "github.com/koordinator-sh/koordinator/apis/extension"
Expand Down Expand Up @@ -540,12 +541,45 @@ func (p *Plugin) preBindObject(ctx context.Context, cycleState *framework.CycleS
return nil
}

err := p.fillID(state.allocationResult, nodeName)
if err != nil {
return framework.AsStatus(err)
}

if err := apiext.SetDeviceAllocations(object, state.allocationResult); err != nil {
return framework.NewStatus(framework.Error, err.Error())
}
return nil
}

func (p *Plugin) fillID(allocationResult apiext.DeviceAllocations, nodeName string) error {
extendedHandle, ok := p.handle.(frameworkext.ExtendedHandle)
if !ok {
return fmt.Errorf("expect handle to be type frameworkext.ExtendedHandle, got %T", p.handle)
}
deviceLister := extendedHandle.KoordinatorSharedInformerFactory().Scheduling().V1alpha1().Devices().Lister()
device, err := deviceLister.Get(nodeName)
if err != nil {
klog.ErrorS(err, "Failed to get Device", "node", nodeName)
return err
}
for deviceType, allocations := range allocationResult {
if deviceType == schedulingv1alpha1.GPU {
// because gpu minor is well known, ID isn't needed
continue
}
for i, allocation := range allocations {
for _, info := range device.Spec.Devices {
if info.Minor != nil && *info.Minor == allocation.Minor && info.Type == deviceType {
allocationResult[deviceType][i].ID = info.UUID
break
}
}
}
}
return nil
}

func updateReservationAllocatable(cycleState *framework.CycleState, reservation *schedulingv1alpha1.Reservation) *framework.Status {
state, status := getPreFilterState(cycleState)
if !status.IsSuccess() {
Expand Down
35 changes: 29 additions & 6 deletions pkg/scheduler/plugins/deviceshare/plugin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3731,7 +3731,8 @@ func Test_Plugin_PreBind(t *testing.T) {
Name: "test-container-a",
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
apiext.ResourceGPU: resource.MustParse("2"),
apiext.ResourceGPU: resource.MustParse("2"),
apiext.ResourceRDMA: resource.MustParse("100"),
},
},
},
Expand All @@ -3746,6 +3747,7 @@ func Test_Plugin_PreBind(t *testing.T) {
name string
args args
wantPod *corev1.Pod
deviceCR *schedulingv1alpha1.Device
wantStatus *framework.Status
}{
{
Expand All @@ -3767,7 +3769,7 @@ func Test_Plugin_PreBind(t *testing.T) {
wantPod: &corev1.Pod{},
},
{
name: "pre-bind successfully",
name: "pre-bind successfully, deviceCR with topology",
args: args{
pod: testPod.DeepCopy(),
state: &preFilterState{
Expand All @@ -3791,23 +3793,35 @@ func Test_Plugin_PreBind(t *testing.T) {
},
},
},
schedulingv1alpha1.RDMA: {
{
Minor: 1,
Resources: corev1.ResourceList{
apiext.ResourceRDMA: resource.MustParse("100"),
},
},
},
},
podRequests: map[schedulingv1alpha1.DeviceType]corev1.ResourceList{
schedulingv1alpha1.GPU: {
apiext.ResourceGPUCore: resource.MustParse("200"),
apiext.ResourceGPUMemoryRatio: resource.MustParse("200"),
apiext.ResourceGPUMemory: resource.MustParse("32Gi"),
},
schedulingv1alpha1.RDMA: {
apiext.ResourceRDMA: resource.MustParse("100"),
},
},
},
},
deviceCR: fakeDeviceCR,
wantPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "123456789",
Namespace: "default",
Name: "test",
Annotations: map[string]string{
apiext.AnnotationDeviceAllocated: `{"gpu":[{"minor":0,"resources":{"koordinator.sh/gpu-core":"100","koordinator.sh/gpu-memory":"16Gi","koordinator.sh/gpu-memory-ratio":"100"}},{"minor":1,"resources":{"koordinator.sh/gpu-core":"100","koordinator.sh/gpu-memory":"16Gi","koordinator.sh/gpu-memory-ratio":"100"}}]}`,
apiext.AnnotationDeviceAllocated: `{"gpu":[{"minor":0,"resources":{"koordinator.sh/gpu-core":"100","koordinator.sh/gpu-memory":"16Gi","koordinator.sh/gpu-memory-ratio":"100"}},{"minor":1,"resources":{"koordinator.sh/gpu-core":"100","koordinator.sh/gpu-memory":"16Gi","koordinator.sh/gpu-memory-ratio":"100"}}],"rdma":[{"minor":1,"resources":{"koordinator.sh/rdma":"100"},"id":"0000:1f:00.0"}]}`,
},
},
Spec: corev1.PodSpec{
Expand All @@ -3817,7 +3831,8 @@ func Test_Plugin_PreBind(t *testing.T) {
Name: "test-container-a",
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
apiext.ResourceGPU: resource.MustParse("2"),
apiext.ResourceGPU: resource.MustParse("2"),
apiext.ResourceRDMA: resource.MustParse("100"),
},
},
},
Expand All @@ -3831,6 +3846,11 @@ func Test_Plugin_PreBind(t *testing.T) {
suit := newPluginTestSuit(t, nil)
_, err := suit.ClientSet().CoreV1().Pods(testPod.Namespace).Create(context.TODO(), testPod, metav1.CreateOptions{})
assert.NoError(t, err)
if tt.deviceCR != nil {
_, err = suit.koordClientSet.SchedulingV1alpha1().Devices().Create(context.TODO(), tt.deviceCR, metav1.CreateOptions{})
assert.NoError(t, err)
}

pl, err := suit.proxyNew(getDefaultArgs(), suit.Framework)
assert.NoError(t, err)

Expand All @@ -3843,7 +3863,7 @@ func Test_Plugin_PreBind(t *testing.T) {
if tt.args.state != nil {
cycleState.Write(stateKey, tt.args.state)
}
status := pl.(*Plugin).PreBind(context.TODO(), cycleState, tt.args.pod, "test-node")
status := pl.(*Plugin).PreBind(context.TODO(), cycleState, tt.args.pod, fakeDeviceCR.Name)
assert.Equal(t, tt.wantStatus, status)
assert.Equal(t, tt.wantPod, tt.args.pod)
})
Expand Down Expand Up @@ -3906,6 +3926,9 @@ func Test_Plugin_PreBindReservation(t *testing.T) {
_, err := suit.koordClientSet.SchedulingV1alpha1().Reservations().Create(context.TODO(), reservation, metav1.CreateOptions{})
assert.NoError(t, err)

_, err = suit.koordClientSet.SchedulingV1alpha1().Devices().Create(context.TODO(), fakeDeviceCRWithoutTopology, metav1.CreateOptions{})
assert.NoError(t, err)

pl, err := suit.proxyNew(getDefaultArgs(), suit.Framework)
assert.NoError(t, err)

Expand All @@ -3916,7 +3939,7 @@ func Test_Plugin_PreBindReservation(t *testing.T) {

cycleState := framework.NewCycleState()
cycleState.Write(stateKey, state)
status := pl.(*Plugin).PreBindReservation(context.TODO(), cycleState, reservation, "test-node")
status := pl.(*Plugin).PreBindReservation(context.TODO(), cycleState, reservation, fakeDeviceCRWithoutTopology.Name)
assert.True(t, status.IsSuccess())

allocations, err := apiext.GetDeviceAllocations(reservation.Annotations)
Expand Down

0 comments on commit 13a7107

Please sign in to comment.