Skip to content

Commit

Permalink
feat: add qwen preset test (#788)
Browse files Browse the repository at this point in the history
**Reason for Change**:

- remove outdated manifests
- replace LoadBalancer by ClusterIP, won't directly access to IP anymore

---------

Signed-off-by: jerryzhuang <[email protected]>
Co-authored-by: Fei Guo <[email protected]>
  • Loading branch information
zhuangqh and Fei-Guo authored Dec 20, 2024
1 parent 2c1d5bf commit be3620b
Show file tree
Hide file tree
Showing 25 changed files with 234 additions and 133 deletions.
9 changes: 9 additions & 0 deletions .github/e2e-preset-configs.json
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,15 @@
"OSS": true,
"loads_adapter": false
},
{
"name": "qwen2.5-coder-7b-instruct",
"workload": "qwen2-5-coder-7b-instruct",
"node-count": 1,
"node-vm-size": "Standard_NC12s_v3",
"node-osdisk-size": 100,
"OSS": true,
"loads_adapter": false
},
{
"name": "llama-2-7b",
"node-count": 1,
Expand Down
54 changes: 31 additions & 23 deletions .github/workflows/e2e-preset-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ jobs:
run: |
NAME_SUFFIX=${{ matrix.model.name }}
NAME_SUFFIX_WITHOUT_DASHES=${NAME_SUFFIX//-/} # Removing all '-' symbols
NAME_SUFFIX_WITHOUT_DASHES=${NAME_SUFFIX_WITHOUT_DASHES//./} # Removing all '.' symbols
if [ ${#NAME_SUFFIX_WITHOUT_DASHES} -gt 12 ]; then
TRUNCATED_NAME_SUFFIX=${NAME_SUFFIX_WITHOUT_DASHES:0:12}
Expand Down Expand Up @@ -213,16 +214,21 @@ jobs:
fi
fi
- name: Get testing workload
id: workload
run: |
WORKLOAD_NAME=${{ matrix.model.workload || matrix.model.name }}
echo "WORKLOAD_NAME=$WORKLOAD_NAME" >> $GITHUB_OUTPUT
echo "WORKLOAD_FILE_PREFIX=presets/workspace/test/manifests/$WORKLOAD_NAME/$WORKLOAD_NAME" >> $GITHUB_OUTPUT
- name: Create Service
run: kubectl apply -f presets/workspace/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-service.yaml
run: |
kubectl apply -f ${{steps.workload.outputs.WORKLOAD_FILE_PREFIX}}-service.yaml
- name: Retrieve External Service IP
id: get_ip
run: |
while [[ -z $SERVICE_IP ]]; do
SERVICE_IP=$(kubectl get svc ${{ matrix.model.name }} -o=jsonpath='{.status.loadBalancer.ingress[0].ip}')
sleep 5
done
SERVICE_IP=$(kubectl get svc ${{steps.workload.outputs.WORKLOAD_NAME}} -o=jsonpath='{.spec.clusterIP}')
echo "Service IP is $SERVICE_IP"
echo "SERVICE_IP=$SERVICE_IP" >> $GITHUB_OUTPUT
Expand All @@ -235,36 +241,38 @@ jobs:
- name: Replace IP and Deploy Resource to K8s
run: |
POSTFIX=$(echo "${{ matrix.model.name }}" | grep -q "llama" && echo "" || echo "_${{ env.RUNTIME }}")
sed -i "s/MASTER_ADDR_HERE/${{ steps.get_ip.outputs.SERVICE_IP }}/g" presets/workspace/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}$POSTFIX.yaml
sed -i "s/TAG_HERE/${{ matrix.model.tag }}/g" presets/workspace/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}$POSTFIX.yaml
sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" presets/workspace/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}$POSTFIX.yaml
kubectl apply -f presets/workspace/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}$POSTFIX.yaml
WORKLOAD_FILE=${{steps.workload.outputs.WORKLOAD_FILE_PREFIX}}$POSTFIX.yaml
sed -i "s/MASTER_ADDR_HERE/${{ steps.get_ip.outputs.SERVICE_IP }}/g" $WORKLOAD_FILE
sed -i "s/TAG_HERE/${{ matrix.model.tag }}/g" $WORKLOAD_FILE
sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" $WORKLOAD_FILE
kubectl apply -f $WORKLOAD_FILE
- name: Wait for Resource to be ready
run: |
kubectl rollout status ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} --timeout=1800s
kubectl rollout status ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} --timeout=1800s
- name: Check Adapter Loading from Logs
if: matrix.model.loads_adapter == true
run: |
POD_NAME=$(kubectl get pods -l app=${{ matrix.model.name }} -o jsonpath="{.items[0].metadata.name}")
POD_NAME=$(kubectl get pods -l app=${{steps.workload.outputs.WORKLOAD_NAME}} -o jsonpath="{.items[0].metadata.name}")
kubectl logs $POD_NAME | grep "Adapter added:" | grep "${{ matrix.model.expected_adapter }}" || (echo "Adapter not loaded or incorrect adapter loaded" && exit 1)
- name: Install testing commands
run: |
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- apt-get update
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- apt-get install -y curl
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- apt-get update
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- apt-get install -y curl
- name: Test healthz endpoint
run: |
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- \
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
curl -s http://localhost:5000/health
- name: Test inference endpoint
run: |
echo "Testing inference for ${{ matrix.model.name }}"
if [[ "${{ matrix.model.name }}" == *"llama"* && "${{ matrix.model.name }}" == *"-chat"* ]]; then
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- \
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
curl -s -X POST \
-H "Content-Type: application/json" \
-d '{
Expand All @@ -285,7 +293,7 @@ jobs:
}' \
http://localhost:5000/chat
elif [[ "${{ matrix.model.name }}" == *"llama"* ]]; then
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- \
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
curl -s -X POST \
-H "Content-Type: application/json" \
-d '{
Expand All @@ -301,7 +309,7 @@ jobs:
}' \
http://localhost:5000/generate
elif [[ "${{ env.RUNTIME }}" == *"vllm"* ]]; then
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- \
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
curl -s -X POST \
-H "accept: application/json" \
-H "Content-Type: application/json" \
Expand All @@ -320,7 +328,7 @@ jobs:
}' \
http://localhost:5000/v1/chat/completions
else
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} -- \
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
curl -s -X POST \
-H "accept: application/json" \
-H "Content-Type: application/json" \
Expand Down Expand Up @@ -367,15 +375,15 @@ jobs:
RESOURCE_TYPE=${{ steps.resource.outputs.RESOURCE_TYPE }}
# Check and Delete K8s Resource (Deployment or StatefulSet)
if kubectl get $RESOURCE_TYPE ${{ matrix.model.name }} > /dev/null 2>&1; then
kubectl logs $RESOURCE_TYPE/${{ matrix.model.name }}
kubectl delete $RESOURCE_TYPE ${{ matrix.model.name }}
if kubectl get $RESOURCE_TYPE ${{steps.workload.outputs.WORKLOAD_NAME}} > /dev/null 2>&1; then
kubectl logs $RESOURCE_TYPE/${{steps.workload.outputs.WORKLOAD_NAME}}
kubectl delete $RESOURCE_TYPE ${{steps.workload.outputs.WORKLOAD_NAME}}
fi
fi
# Check and Delete K8s Service if it exists
if kubectl get svc ${{ matrix.model.name }} > /dev/null 2>&1; then
kubectl delete svc ${{ matrix.model.name }}
if kubectl get svc ${{steps.workload.outputs.WORKLOAD_NAME}} > /dev/null 2>&1; then
kubectl delete svc ${{steps.workload.outputs.WORKLOAD_NAME}}
fi
# Check and Delete AKS Nodepool if it exists
Expand Down
54 changes: 26 additions & 28 deletions presets/workspace/models/supported_models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,14 @@ models:
type: text-generation
version: https://huggingface.co/tiiuae/falcon-7b/commit/898df1396f35e447d5fe44e0a3ccaaaa69f30d36
runtime: tfs
tag: 0.0.7
tag: 0.0.8
- name: falcon-7b-instruct
type: text-generation
version: https://huggingface.co/tiiuae/falcon-7b-instruct/commit/cf4b3c42ce2fdfe24f753f0f0d179202fea59c99
runtime: tfs
tag: 0.0.7
tag: 0.0.8
# Tag history:
# 0.0.8 - Support adapter and config file for VLLM runtime
# 0.0.7 - Support VLLM runtime
# 0.0.6 - Add Logging & Metrics Server
# 0.0.5 - Tuning and Adapters
Expand All @@ -53,13 +54,14 @@ models:
type: text-generation
version: https://huggingface.co/tiiuae/falcon-40b/commit/4a70170c215b36a3cce4b4253f6d0612bb7d4146
runtime: tfs
tag: 0.0.8
tag: 0.0.9
- name: falcon-40b-instruct
type: text-generation
version: https://huggingface.co/tiiuae/falcon-40b-instruct/commit/ecb78d97ac356d098e79f0db222c9ce7c5d9ee5f
runtime: tfs
tag: 0.0.8
tag: 0.0.9
# Tag history for 40b models:
# 0.0.9 - Support adapter and config file for VLLM runtime
# 0.0.8 - Support VLLM runtime
# 0.0.7 - Add Logging & Metrics Server
# 0.0.6 - Tuning and Adapters
Expand All @@ -74,13 +76,14 @@ models:
type: text-generation
version: https://huggingface.co/mistralai/Mistral-7B-v0.3/commit/d8cadc02ac76bd617a919d50b092e59d2d110aff
runtime: tfs
tag: 0.0.8
tag: 0.0.9
- name: mistral-7b-instruct
type: text-generation
version: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3/commit/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db
runtime: tfs
tag: 0.0.8
tag: 0.0.9
# Tag history:
# 0.0.9 - Support adapter and config file for VLLM runtime
# 0.0.8 - Support VLLM runtime
# 0.0.7 - Add Logging & Metrics Server
# 0.0.6 - Update model version and Address missing weights files fix
Expand All @@ -95,8 +98,9 @@ models:
type: text-generation
version: https://huggingface.co/microsoft/phi-2/commit/ef382358ec9e382308935a992d908de099b64c23
runtime: tfs
tag: 0.0.6
tag: 0.0.7
# Tag history:
# 0.0.7 - Support adapter and config file for VLLM runtime
# 0.0.6 - Support VLLM runtime
# 0.0.5 - Add Logging & Metrics Server
# 0.0.4 - Tuning and Adapters
Expand All @@ -109,38 +113,24 @@ models:
type: text-generation
version: https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/commit/0a67737cc96d2554230f90338b163bc6380a2a85
runtime: tfs
tag: 0.0.3
# Tag history:
# 0.0.3 - Support VLLM runtime
# 0.0.2 - Add Logging & Metrics Server
# 0.0.1 - Initial Release

tag: 0.0.4
- name: phi-3-mini-128k-instruct
type: text-generation
version: https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/commit/a90b62ae09941edff87a90ced39ba5807e6b2ade
runtime: tfs
tag: 0.0.3
# Tag history:
# 0.0.3 - Support VLLM runtime
# 0.0.2 - Add Logging & Metrics Server
# 0.0.1 - Initial Release

tag: 0.0.4
- name: phi-3-medium-4k-instruct
type: text-generation
version: https://huggingface.co/microsoft/Phi-3-medium-4k-instruct/commit/ae004ae82eb6eddc32906dfacb1d6dfea8f91996
runtime: tfs
tag: 0.0.3
# Tag history:
# 0.0.3 - Support VLLM runtime
# 0.0.2 - Add Logging & Metrics Server
# 0.0.1 - Initial Release

tag: 0.0.4
- name: phi-3-medium-128k-instruct
type: text-generation
version: https://huggingface.co/microsoft/Phi-3-medium-128k-instruct/commit/fa7d2aa4f5ea69b2e36b20d050cdae79c9bfbb3f
runtime: tfs
tag: 0.0.3
tag: 0.0.4
# Tag history:
# 0.0.4 - Support adapter and config file for VLLM runtime
# 0.0.3 - Support VLLM runtime
# 0.0.2 - Add Logging & Metrics Server
# 0.0.1 - Initial Release
Expand All @@ -149,7 +139,15 @@ models:
type: text-generation
version: https://huggingface.co/microsoft/Phi-3.5-mini-instruct/commit/af0dfb8029e8a74545d0736d30cb6b58d2f0f3f0
runtime: tfs
tag: 0.0.1
tag: 0.0.2
# Tag history:
# 0.0.2 - Support adapter and config file for VLLM runtime
# 0.0.1 - New Model! Support VLLM Runtime


- name: qwen2.5-coder-7b-instruct
type: text-generation
version: https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct/commit/0eb6b1ed2d0c4306bc637d09ecef51e59d3dfe05
runtime: tfs
tag: 0.0.1
# Tag history:
# 0.0.1 - New Model!
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ spec:
- protocol: TCP
port: 80
targetPort: 5000
type: LoadBalancer
type: ClusterIP
publishNotReadyAddresses: true
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ spec:
- protocol: TCP
port: 80
targetPort: 5000
type: LoadBalancer
type: ClusterIP
publishNotReadyAddresses: true
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ spec:
- protocol: TCP
port: 80
targetPort: 5000
type: LoadBalancer
type: ClusterIP
publishNotReadyAddresses: true

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ spec:
- protocol: TCP
port: 80
targetPort: 5000
type: LoadBalancer
type: ClusterIP
publishNotReadyAddresses: true
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ spec:
protocol: TCP
port: 29500
targetPort: 29500
type: LoadBalancer
type: ClusterIP
publishNotReadyAddresses: true
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ spec:
protocol: TCP
port: 29500
targetPort: 29500
type: LoadBalancer
type: ClusterIP
publishNotReadyAddresses: true
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ spec:
- protocol: TCP
port: 80
targetPort: 5000
type: LoadBalancer
type: ClusterIP
publishNotReadyAddresses: true
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ spec:
- protocol: TCP
port: 80
targetPort: 5000
type: LoadBalancer
type: ClusterIP
publishNotReadyAddresses: true
14 changes: 0 additions & 14 deletions presets/workspace/test/manifests/llama-headless.yaml

This file was deleted.

Loading

0 comments on commit be3620b

Please sign in to comment.