From cba8b1821703267cdb7d80084d1418aff47afec5 Mon Sep 17 00:00:00 2001 From: Eero Tamminen Date: Thu, 5 Sep 2024 21:42:42 +0300 Subject: [PATCH] Drop earlier resource/probe changes in common components As they were needed for services running on CPUs, but original values were used also with devices, and there's now separate top-level cpu-values.yaml file for that. Signed-off-by: Eero Tamminen --- helm-charts/common/tei/values.yaml | 15 +++----- helm-charts/common/teirerank/values.yaml | 23 +++++------- helm-charts/common/tgi/values.yaml | 45 +++++++++--------------- 3 files changed, 31 insertions(+), 52 deletions(-) diff --git a/helm-charts/common/tei/values.yaml b/helm-charts/common/tei/values.yaml index 0f26cf8e0..55e71268c 100644 --- a/helm-charts/common/tei/values.yaml +++ b/helm-charts/common/tei/values.yaml @@ -48,19 +48,16 @@ service: type: ClusterIP resources: {} - # Appropriate resource requests depend on selected inferencing model, SW version and - # (to some extent) desired scaling on underlying HW, so enabling this is left as a conscious + # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little # resources, such as Minikube. If you do want to specify resources, uncomment the following # lines, adjust them as necessary, and remove the curly braces after 'resources:'. - # - # Potentially suitable values for scaling CPU TEI 1.5 with BAAI/bge-base-en-v1.5 model: # limits: - # cpu: 4 - # memory: 4Gi + # cpu: 100m + # memory: 128Mi # requests: - # cpu: 2 - # memory: 3Gi + # cpu: 100m + # memory: 128Mi livenessProbe: httpGet: @@ -69,14 +66,12 @@ livenessProbe: initialDelaySeconds: 5 periodSeconds: 5 failureThreshold: 24 - timeoutSeconds: 2 readinessProbe: httpGet: path: /health port: http initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 2 startupProbe: httpGet: path: /health diff --git a/helm-charts/common/teirerank/values.yaml b/helm-charts/common/teirerank/values.yaml index 14863c7a4..d8514155c 100644 --- a/helm-charts/common/teirerank/values.yaml +++ b/helm-charts/common/teirerank/values.yaml @@ -48,35 +48,30 @@ service: type: ClusterIP resources: {} - # Appropriate resource requests depend on selected inferencing model, SW version and - # (to some extent) desired scaling on underlying HW, so enabling this is left as a conscious + # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little # resources, such as Minikube. If you do want to specify resources, uncomment the following # lines, adjust them as necessary, and remove the curly braces after 'resources:'. - # - # Potentially suitable values for scaling CPU TEI v1.5 with BAAI/bge-reranker-base model: # limits: - # cpu: 4 - # memory: 30Gi + # cpu: 100m + # memory: 128Mi # requests: - # cpu: 2 - # memory: 25Gi + # cpu: 100m + # memory: 128Mi livenessProbe: httpGet: path: /health port: http - initialDelaySeconds: 8 - periodSeconds: 8 + initialDelaySeconds: 5 + periodSeconds: 5 failureThreshold: 24 - timeoutSeconds: 4 readinessProbe: httpGet: path: /health port: http - initialDelaySeconds: 8 - periodSeconds: 8 - timeoutSeconds: 4 + initialDelaySeconds: 5 + periodSeconds: 5 startupProbe: httpGet: path: /health diff --git a/helm-charts/common/tgi/values.yaml b/helm-charts/common/tgi/values.yaml index 5ffa0f0e7..5487e548f 100644 --- a/helm-charts/common/tgi/values.yaml +++ b/helm-charts/common/tgi/values.yaml @@ -47,67 +47,56 @@ service: type: ClusterIP resources: {} - # Appropriate resource requests depend on selected inferencing model, SW version and - # (to some extent) desired scaling on underlying HW, so enabling this is left as a conscious + # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little # resources, such as Minikube. If you do want to specify resources, uncomment the following # lines, adjust them as necessary, and remove the curly braces after 'resources:'. - # - # Potentially suitable values for scaling CPU TGI 2.2 with Intel/neural-chat-7b-v3-3: # limits: - # cpu: 8 - # memory: 70Gi + # cpu: 100m + # memory: 128Mi # requests: - # cpu: 6 - # memory: 65Gi + # cpu: 100m + # memory: 128Mi # Use TCP probe instead of HTTP due to bug #483 # https://github.com/opea-project/GenAIExamples/issues/483 livenessProbe: tcpSocket: port: http - initialDelaySeconds: 8 - periodSeconds: 8 + initialDelaySeconds: 5 + periodSeconds: 5 failureThreshold: 24 - timeoutSeconds: 4 readinessProbe: tcpSocket: port: http - initialDelaySeconds: 16 - periodSeconds: 8 - timeoutSeconds: 4 + initialDelaySeconds: 5 + periodSeconds: 5 startupProbe: tcpSocket: port: http - initialDelaySeconds: 10 + initialDelaySeconds: 5 periodSeconds: 5 - # Startup / warmup can take over 10 min on slower / older nodes with multiple instances on same node. - # K8s restarting pod before its startup (CPU usage) finishes, does not really help... - failureThreshold: 180 - timeoutSeconds: 2 + failureThreshold: 120 #livenessProbe: # httpGet: # path: /health # port: http -# initialDelaySeconds: 8 -# periodSeconds: 8 +# initialDelaySeconds: 5 +# periodSeconds: 5 # failureThreshold: 24 -# timeoutSeconds: 4 #readinessProbe: # httpGet: # path: /health # port: http -# initialDelaySeconds: 16 -# periodSeconds: 8 -# timeoutSeconds: 4 +# initialDelaySeconds: 5 +# periodSeconds: 5 #startupProbe: # httpGet: # path: /health # port: http -# initialDelaySeconds: 10 +# initialDelaySeconds: 5 # periodSeconds: 5 -# failureThreshold: 180 -# timeoutSeconds: 2 +# failureThreshold: 120 nodeSelector: {}