-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for vLLM containers (#16)
This commit adds support for vLLM containers with some example documentation to support it. The schema has been updated to allow for the vLLM container. These changes help toward the migration to the v2.x architecture.
- Loading branch information
1 parent
646e0fa
commit b3bf672
Showing
6 changed files
with
84 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
ARG BASE_IMAGE | ||
FROM ${BASE_IMAGE} | ||
|
||
##### DOWNLOAD MOUNTPOINTS S3 | ||
ARG MOUNTS3_DEB_URL | ||
RUN apt update -y && apt install -y wget rsync && \ | ||
wget ${MOUNTS3_DEB_URL} && \ | ||
apt install -y ./mount-s3.deb && \ | ||
rm mount-s3.deb | ||
|
||
COPY src/entrypoint.sh ./entrypoint.sh | ||
RUN chmod +x entrypoint.sh | ||
|
||
ENTRYPOINT ["./entrypoint.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#!/bin/bash | ||
set -e | ||
|
||
declare -a vars=("S3_BUCKET_MODELS" "LOCAL_MODEL_PATH" "MODEL_NAME" "S3_MOUNT_POINT" "THREADS") | ||
|
||
# Check the necessary environment variables | ||
for var in "${vars[@]}"; do | ||
if [[ -z "${!var}" ]]; then | ||
echo "$var must be set" | ||
exit 1 | ||
fi | ||
done | ||
|
||
# Create S3 mount point to ephemeral NVMe drive | ||
echo "Creating S3 mountpoint for bucket ${S3_BUCKET_MODELS} at container mount point path ${S3_MOUNT_POINT}/${MODEL_NAME}" | ||
mkdir -p ${S3_MOUNT_POINT} | ||
mount-s3 ${S3_BUCKET_MODELS} ${S3_MOUNT_POINT} | ||
|
||
echo "Downloading model ${S3_BUCKET_MODELS} to container path ${LOCAL_MODEL_PATH}" | ||
mkdir -p ${LOCAL_MODEL_PATH} | ||
|
||
# Use rsync with S3_MOUNT_POINT | ||
ls ${S3_MOUNT_POINT}/${MODEL_NAME} | xargs -n1 -P${THREADS} -I% rsync -Pa --exclude "*.bin" ${S3_MOUNT_POINT}/${MODEL_NAME}/% ${LOCAL_MODEL_PATH}/ | ||
|
||
ADDITIONAL_ARGS="" | ||
if [[ -n "${MAX_TOTAL_TOKENS}" ]]; then | ||
ADDITIONAL_ARGS+=" --max-model-len ${MAX_TOTAL_TOKENS}" | ||
fi | ||
|
||
# Start the webserver | ||
echo "Starting vLLM" | ||
python3 -m vllm.entrypoints.openai.api_server \ | ||
--model ${LOCAL_MODEL_PATH} \ | ||
--served-model-name ${MODEL_NAME} \ | ||
--port 8080 ${ADDITIONAL_ARGS} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters