Drop earlier resource/probe changes in common components

As they were needed for services running on CPUs, but original values were used also with devices, and there's now separate top-level cpu-values.yaml file for that. Signed-off-by: Eero Tamminen <[email protected]>
opea-project · Sep 5, 2024 · cba8b18 · cba8b18
1 parent 7a06104
commit cba8b18
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 52 deletions.
diff --git a/helm-charts/common/tei/values.yaml b/helm-charts/common/tei/values.yaml
@@ -48,19 +48,16 @@ service:
   type: ClusterIP
 
 resources: {}
-  # Appropriate resource requests depend on selected inferencing model, SW version and
-  # (to some extent) desired scaling on underlying HW, so enabling this is left as a conscious
+  # We usually recommend not to specify default resources and to leave this as a conscious
   # choice for the user. This also increases chances charts run on environments with little
   # resources, such as Minikube. If you do want to specify resources, uncomment the following
   # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
-  #
-  # Potentially suitable values for scaling CPU TEI 1.5 with BAAI/bge-base-en-v1.5 model:
   # limits:
-  #   cpu: 4
-  #   memory: 4Gi
+  #   cpu: 100m
+  #   memory: 128Mi
   # requests:
-  #   cpu: 2
-  #   memory: 3Gi
+  #   cpu: 100m
+  #   memory: 128Mi
 
 livenessProbe:
   httpGet:
@@ -69,14 +66,12 @@ livenessProbe:
   initialDelaySeconds: 5
   periodSeconds: 5
   failureThreshold: 24
-  timeoutSeconds: 2
 readinessProbe:
   httpGet:
     path: /health
     port: http
   initialDelaySeconds: 5
   periodSeconds: 5
-  timeoutSeconds: 2
 startupProbe:
   httpGet:
     path: /health

diff --git a/helm-charts/common/teirerank/values.yaml b/helm-charts/common/teirerank/values.yaml
@@ -48,35 +48,30 @@ service:
   type: ClusterIP
 
 resources: {}
-  # Appropriate resource requests depend on selected inferencing model, SW version and
-  # (to some extent) desired scaling on underlying HW, so enabling this is left as a conscious
+  # We usually recommend not to specify default resources and to leave this as a conscious
   # choice for the user. This also increases chances charts run on environments with little
   # resources, such as Minikube. If you do want to specify resources, uncomment the following
   # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
-  #
-  # Potentially suitable values for scaling CPU TEI v1.5 with BAAI/bge-reranker-base model:
   # limits:
-  #   cpu: 4
-  #   memory: 30Gi
+  #   cpu: 100m
+  #   memory: 128Mi
   # requests:
-  #   cpu: 2
-  #   memory: 25Gi
+  #   cpu: 100m
+  #   memory: 128Mi
 
 livenessProbe:
   httpGet:
     path: /health
     port: http
-  initialDelaySeconds: 8
-  periodSeconds: 8
+  initialDelaySeconds: 5
+  periodSeconds: 5
   failureThreshold: 24
-  timeoutSeconds: 4
 readinessProbe:
   httpGet:
     path: /health
     port: http
-  initialDelaySeconds: 8
-  periodSeconds: 8
-  timeoutSeconds: 4
+  initialDelaySeconds: 5
+  periodSeconds: 5
 startupProbe:
   httpGet:
     path: /health

diff --git a/helm-charts/common/tgi/values.yaml b/helm-charts/common/tgi/values.yaml
@@ -47,67 +47,56 @@ service:
   type: ClusterIP
 
 resources: {}
-  # Appropriate resource requests depend on selected inferencing model, SW version and
-  # (to some extent) desired scaling on underlying HW, so enabling this is left as a conscious
+  # We usually recommend not to specify default resources and to leave this as a conscious
   # choice for the user. This also increases chances charts run on environments with little
   # resources, such as Minikube. If you do want to specify resources, uncomment the following
   # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
-  #
-  # Potentially suitable values for scaling CPU TGI 2.2 with Intel/neural-chat-7b-v3-3:
   # limits:
-  #   cpu: 8
-  #   memory: 70Gi
+  #   cpu: 100m
+  #   memory: 128Mi
   # requests:
-  #   cpu: 6
-  #   memory: 65Gi
+  #   cpu: 100m
+  #   memory: 128Mi
 
 # Use TCP probe instead of HTTP due to bug #483
 # https://github.com/opea-project/GenAIExamples/issues/483
 livenessProbe:
   tcpSocket:
     port: http
-  initialDelaySeconds: 8
-  periodSeconds: 8
+  initialDelaySeconds: 5
+  periodSeconds: 5
   failureThreshold: 24
-  timeoutSeconds: 4
 readinessProbe:
   tcpSocket:
     port: http
-  initialDelaySeconds: 16
-  periodSeconds: 8
-  timeoutSeconds: 4
+  initialDelaySeconds: 5
+  periodSeconds: 5
 startupProbe:
   tcpSocket:
     port: http
-  initialDelaySeconds: 10
+  initialDelaySeconds: 5
   periodSeconds: 5
-  # Startup / warmup can take over 10 min on slower / older nodes with multiple instances on same node.
-  # K8s restarting pod before its startup (CPU usage) finishes, does not really help...
-  failureThreshold: 180
-  timeoutSeconds: 2
+  failureThreshold: 120
 #livenessProbe:
 #  httpGet:
 #    path: /health
 #    port: http
-#  initialDelaySeconds: 8
-#  periodSeconds: 8
+#  initialDelaySeconds: 5
+#  periodSeconds: 5
 #  failureThreshold: 24
-#  timeoutSeconds: 4
 #readinessProbe:
 #  httpGet:
 #    path: /health
 #    port: http
-#  initialDelaySeconds: 16
-#  periodSeconds: 8
-#  timeoutSeconds: 4
+#  initialDelaySeconds: 5
+#  periodSeconds: 5
 #startupProbe:
 #  httpGet:
 #    path: /health
 #    port: http
-#  initialDelaySeconds: 10
+#  initialDelaySeconds: 5
 #  periodSeconds: 5
-#  failureThreshold: 180
-#  timeoutSeconds: 2
+#  failureThreshold: 120
 
 nodeSelector: {}