Skip to content

Commit

Permalink
Add vLLM support for DocSum
Browse files Browse the repository at this point in the history
Signed-off-by: Eero Tamminen <[email protected]>
  • Loading branch information
eero-t committed Jan 2, 2025
1 parent 68e7d06 commit 7491ab7
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 4 deletions.
5 changes: 5 additions & 0 deletions helm-charts/docsum/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ dependencies:
- name: tgi
version: 0-latest
repository: "file://../common/tgi"
condition: tgi.enabled
- name: vllm
version: 0-latest
repository: "file://../common/vllm"
condition: vllm.enabled
- name: llm-uservice
version: 0-latest
repository: "file://../common/llm-uservice"
Expand Down
6 changes: 4 additions & 2 deletions helm-charts/docsum/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ export HFTOKEN="insert-your-huggingface-token-here"
export MODELDIR="/mnt/opea-models"
export MODELNAME="Intel/neural-chat-7b-v3-3"
helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME}
# To use Gaudi device
# helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values docsum/gaudi-values.yaml
# To use Gaudi device with TGI
# helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values docsum/gaudi-tgi-values.yaml ...
# To use Gaudi device with vLLM
# helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values docsum/gaudi-vllm-values.yaml ..
```

## Verify
Expand Down
1 change: 1 addition & 0 deletions helm-charts/docsum/ci-gaudi-tgi-values.yaml
1 change: 0 additions & 1 deletion helm-charts/docsum/ci-gaudi-values.yaml

This file was deleted.

1 change: 1 addition & 0 deletions helm-charts/docsum/ci-gaudi-vllm-values.yaml
File renamed without changes.
45 changes: 45 additions & 0 deletions helm-charts/docsum/gaudi-vllm-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# Accelerate inferencing in heaviest components to improve performance
# by overriding their subchart values

tgi:
enabled: false

llm-uservice:
image:
repository: opea/llm-docsum-vllm
tag: "latest"

vllm:
enabled: true
image:
repository: opea/vllm-gaudi
tag: "latest"
resources:
limits:
habana.ai/gaudi: 1
startupProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1

PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
OMPI_MCA_btl_vader_single_copy_mechanism: "none"

extraCmdArgs: [
"--tensor-parallel-size", "1",
"--block-size", "128",
"--max-num-seqs", "256",
"--max-seq_len-to-capture", "2048"
]
6 changes: 5 additions & 1 deletion helm-charts/docsum/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,12 @@ llm-uservice:
MAX_TOTAL_TOKENS: "2048"
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3

# To override values in subchart tgi
# To override values in tgi/vllm subcharts
tgi:
enabled: true
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
vllm:
enabled: false
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
Expand Down

0 comments on commit 7491ab7

Please sign in to comment.