Skip to content

Commit

Permalink
Drop earlier resource/probe changes in common components
Browse files Browse the repository at this point in the history
As they were needed for services running on CPUs, but
original values were used also with devices, and there's
now separate top-level cpu-values.yaml file for that.

Signed-off-by: Eero Tamminen <[email protected]>
  • Loading branch information
eero-t committed Sep 5, 2024
1 parent 7a06104 commit cba8b18
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 52 deletions.
15 changes: 5 additions & 10 deletions helm-charts/common/tei/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,16 @@ service:
type: ClusterIP

resources: {}
# Appropriate resource requests depend on selected inferencing model, SW version and
# (to some extent) desired scaling on underlying HW, so enabling this is left as a conscious
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
#
# Potentially suitable values for scaling CPU TEI 1.5 with BAAI/bge-base-en-v1.5 model:
# limits:
# cpu: 4
# memory: 4Gi
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 2
# memory: 3Gi
# cpu: 100m
# memory: 128Mi

livenessProbe:
httpGet:
Expand All @@ -69,14 +66,12 @@ livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 24
timeoutSeconds: 2
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 2
startupProbe:
httpGet:
path: /health
Expand Down
23 changes: 9 additions & 14 deletions helm-charts/common/teirerank/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,35 +48,30 @@ service:
type: ClusterIP

resources: {}
# Appropriate resource requests depend on selected inferencing model, SW version and
# (to some extent) desired scaling on underlying HW, so enabling this is left as a conscious
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
#
# Potentially suitable values for scaling CPU TEI v1.5 with BAAI/bge-reranker-base model:
# limits:
# cpu: 4
# memory: 30Gi
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 2
# memory: 25Gi
# cpu: 100m
# memory: 128Mi

livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 8
periodSeconds: 8
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 24
timeoutSeconds: 4
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 8
periodSeconds: 8
timeoutSeconds: 4
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
httpGet:
path: /health
Expand Down
45 changes: 17 additions & 28 deletions helm-charts/common/tgi/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,67 +47,56 @@ service:
type: ClusterIP

resources: {}
# Appropriate resource requests depend on selected inferencing model, SW version and
# (to some extent) desired scaling on underlying HW, so enabling this is left as a conscious
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
#
# Potentially suitable values for scaling CPU TGI 2.2 with Intel/neural-chat-7b-v3-3:
# limits:
# cpu: 8
# memory: 70Gi
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 6
# memory: 65Gi
# cpu: 100m
# memory: 128Mi

# Use TCP probe instead of HTTP due to bug #483
# https://github.com/opea-project/GenAIExamples/issues/483
livenessProbe:
tcpSocket:
port: http
initialDelaySeconds: 8
periodSeconds: 8
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 24
timeoutSeconds: 4
readinessProbe:
tcpSocket:
port: http
initialDelaySeconds: 16
periodSeconds: 8
timeoutSeconds: 4
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
tcpSocket:
port: http
initialDelaySeconds: 10
initialDelaySeconds: 5
periodSeconds: 5
# Startup / warmup can take over 10 min on slower / older nodes with multiple instances on same node.
# K8s restarting pod before its startup (CPU usage) finishes, does not really help...
failureThreshold: 180
timeoutSeconds: 2
failureThreshold: 120
#livenessProbe:
# httpGet:
# path: /health
# port: http
# initialDelaySeconds: 8
# periodSeconds: 8
# initialDelaySeconds: 5
# periodSeconds: 5
# failureThreshold: 24
# timeoutSeconds: 4
#readinessProbe:
# httpGet:
# path: /health
# port: http
# initialDelaySeconds: 16
# periodSeconds: 8
# timeoutSeconds: 4
# initialDelaySeconds: 5
# periodSeconds: 5
#startupProbe:
# httpGet:
# path: /health
# port: http
# initialDelaySeconds: 10
# initialDelaySeconds: 5
# periodSeconds: 5
# failureThreshold: 180
# timeoutSeconds: 2
# failureThreshold: 120

nodeSelector: {}

Expand Down

0 comments on commit cba8b18

Please sign in to comment.