-
Notifications
You must be signed in to change notification settings - Fork 708
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add pytorch API and controller (#1294)
- Loading branch information
Showing
28 changed files
with
15,438 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# The following patch adds a directive for certmanager to inject CA into the CRD | ||
apiVersion: apiextensions.k8s.io/v1 | ||
kind: CustomResourceDefinition | ||
metadata: | ||
annotations: | ||
cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) | ||
name: pytorchjobs.kubeflow.org |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# The following patch enables a conversion webhook for the CRD | ||
apiVersion: apiextensions.k8s.io/v1 | ||
kind: CustomResourceDefinition | ||
metadata: | ||
name: pytorchjobs.kubeflow.org | ||
spec: | ||
conversion: | ||
strategy: Webhook | ||
webhook: | ||
clientConfig: | ||
service: | ||
namespace: system | ||
name: webhook-service | ||
path: /convert | ||
conversionReviewVersions: | ||
- v1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# permissions for end users to edit pytorchjobs. | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
kind: ClusterRole | ||
metadata: | ||
name: pytorchjob-editor-role | ||
rules: | ||
- apiGroups: | ||
- kubeflow.org | ||
resources: | ||
- pytorchjobs | ||
verbs: | ||
- create | ||
- delete | ||
- get | ||
- list | ||
- patch | ||
- update | ||
- watch | ||
- apiGroups: | ||
- kubeflow.org | ||
resources: | ||
- pytorchjobs/status | ||
verbs: | ||
- get |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# permissions for end users to view pytorchjobs. | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
kind: ClusterRole | ||
metadata: | ||
name: pytorchjob-viewer-role | ||
rules: | ||
- apiGroups: | ||
- kubeflow.org | ||
resources: | ||
- pytorchjobs | ||
verbs: | ||
- get | ||
- list | ||
- watch | ||
- apiGroups: | ||
- kubeflow.org | ||
resources: | ||
- pytorchjobs/status | ||
verbs: | ||
- get |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
apiVersion: kubeflow.org/v1 | ||
kind: PyTorchJob | ||
metadata: | ||
name: pytorchjob-sample | ||
spec: | ||
# Add fields here | ||
foo: bar |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
// Copyright 2018 The Kubeflow Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package v1 | ||
|
||
import ( | ||
common "github.com/kubeflow/common/pkg/apis/common/v1" | ||
) | ||
|
||
const ( | ||
// EnvKubeflowNamespace is ENV for kubeflow namespace specified by user. | ||
EnvKubeflowNamespace = "KUBEFLOW_NAMESPACE" | ||
|
||
// DefaultPortName is name of the port used to communicate between Master and | ||
// workers. | ||
DefaultPortName = "pytorchjob-port" | ||
// DefaultContainerName is the name of the PyTorchJob container. | ||
DefaultContainerName = "pytorch" | ||
// DefaultPort is default value of the port. | ||
DefaultPort = 23456 | ||
// DefaultRestartPolicy is default RestartPolicy for PyTorchReplicaSpec. | ||
DefaultRestartPolicy = common.RestartPolicyOnFailure | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
// Copyright 2018 The Kubeflow Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package v1 | ||
|
||
import ( | ||
"strings" | ||
|
||
common "github.com/kubeflow/common/pkg/apis/common/v1" | ||
v1 "k8s.io/api/core/v1" | ||
"k8s.io/apimachinery/pkg/runtime" | ||
) | ||
|
||
// Int32 is a helper routine that allocates a new int32 value | ||
// to store v and returns a pointer to it. | ||
func Int32(v int32) *int32 { | ||
return &v | ||
} | ||
|
||
func addDefaultingFuncs(scheme *runtime.Scheme) error { | ||
return RegisterDefaults(scheme) | ||
} | ||
|
||
// setDefaultPort sets the default ports for pytorch container. | ||
func setDefaultPort(spec *v1.PodSpec) { | ||
index := 0 | ||
for i, container := range spec.Containers { | ||
if container.Name == DefaultContainerName { | ||
index = i | ||
break | ||
} | ||
} | ||
|
||
hasPyTorchJobPort := false | ||
for _, port := range spec.Containers[index].Ports { | ||
if port.Name == DefaultPortName { | ||
hasPyTorchJobPort = true | ||
break | ||
} | ||
} | ||
if !hasPyTorchJobPort { | ||
spec.Containers[index].Ports = append(spec.Containers[index].Ports, v1.ContainerPort{ | ||
Name: DefaultPortName, | ||
ContainerPort: DefaultPort, | ||
}) | ||
} | ||
} | ||
|
||
func setDefaultReplicas(spec *common.ReplicaSpec) { | ||
if spec.Replicas == nil { | ||
spec.Replicas = Int32(1) | ||
} | ||
if spec.RestartPolicy == "" { | ||
spec.RestartPolicy = DefaultRestartPolicy | ||
} | ||
} | ||
|
||
// setTypeNamesToCamelCase sets the name of all replica types from any case to correct case. | ||
func setTypeNamesToCamelCase(job *PyTorchJob) { | ||
setTypeNameToCamelCase(job, PyTorchReplicaTypeMaster) | ||
setTypeNameToCamelCase(job, PyTorchReplicaTypeWorker) | ||
} | ||
|
||
// setTypeNameToCamelCase sets the name of the replica type from any case to correct case. | ||
func setTypeNameToCamelCase(job *PyTorchJob, typ PyTorchReplicaType) { | ||
for t := range job.Spec.PyTorchReplicaSpecs { | ||
if strings.EqualFold(string(t), string(typ)) && t != typ { | ||
spec := job.Spec.PyTorchReplicaSpecs[t] | ||
delete(job.Spec.PyTorchReplicaSpecs, t) | ||
job.Spec.PyTorchReplicaSpecs[typ] = spec | ||
return | ||
} | ||
} | ||
} | ||
|
||
// SetDefaults_PyTorchJob sets any unspecified values to defaults. | ||
func SetDefaults_PyTorchJob(job *PyTorchJob) { | ||
// Set default cleanpod policy to None. | ||
if job.Spec.CleanPodPolicy == nil { | ||
policy := common.CleanPodPolicyNone | ||
job.Spec.CleanPodPolicy = &policy | ||
} | ||
|
||
// Update the key of PyTorchReplicaSpecs to camel case. | ||
setTypeNamesToCamelCase(job) | ||
|
||
for rType, spec := range job.Spec.PyTorchReplicaSpecs { | ||
// Set default replicas to 1. | ||
setDefaultReplicas(spec) | ||
if rType == PyTorchReplicaTypeMaster { | ||
// Set default port to pytorch container of Master. | ||
setDefaultPort(&spec.Template.Spec) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
// Copyright 2018 The Kubeflow Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
// +k8s:deepcopy-gen=package,register | ||
// +k8s:defaulter-gen=TypeMeta | ||
// +k8s:openapi-gen=true | ||
|
||
// Package v1 is the v1 version of the API. | ||
// +groupName=kubeflow.org | ||
package v1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
// Copyright YEAR The Kubeflow Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
// Package v1 contains API Schema definitions for the kubeflow.org v1 API group | ||
//+kubebuilder:object:generate=true | ||
//+groupName=kubeflow.org | ||
package v1 | ||
|
||
import ( | ||
"k8s.io/apimachinery/pkg/runtime/schema" | ||
"sigs.k8s.io/controller-runtime/pkg/scheme" | ||
) | ||
|
||
var ( | ||
// GroupVersion is group version used to register these objects | ||
GroupVersion = schema.GroupVersion{Group: "kubeflow.org", Version: "v1"} | ||
|
||
// SchemeBuilder is used to add go types to the GroupVersionKind scheme | ||
SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} | ||
|
||
// AddToScheme adds the types in this group-version to the given scheme. | ||
AddToScheme = SchemeBuilder.AddToScheme | ||
) |
Oops, something went wrong.