From dd31cf9ba8e5e25eb9805afcc0ee4b724b0980ea Mon Sep 17 00:00:00 2001 From: Alain Krok Date: Thu, 1 Feb 2024 11:22:41 -0600 Subject: [PATCH] chore(documentation): update documentation --- docs/.vitepress/config.mts | 1 + docs/documentation/self-hosted-models.md | 32 ++++++++ lib/sagemaker-model/README.md | 97 ------------------------ 3 files changed, 33 insertions(+), 97 deletions(-) create mode 100644 docs/documentation/self-hosted-models.md delete mode 100644 lib/sagemaker-model/README.md diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index c595cc5ab..60e89929b 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -46,6 +46,7 @@ export default defineConfig({ items: [ { text: 'Private Chatbot', link: '/documentation/private-chatbot' }, { text: 'Model Requirements', link: '/documentation/model-requirements' }, + { text: 'Self-hosted models', link: '/documentation/self-hosted-models' }, { text: 'Inference Script', link: '/documentation/inference-script' }, { text: 'Document Retrieval', link: '/documentation/retriever' }, { text: 'AppSync', link: '/documentation/appsync' }, diff --git a/docs/documentation/self-hosted-models.md b/docs/documentation/self-hosted-models.md new file mode 100644 index 000000000..375ba1d49 --- /dev/null +++ b/docs/documentation/self-hosted-models.md @@ -0,0 +1,32 @@ +# SageMaker Model Constructs + +This project provides multiple CDK constructs to help facilitate the deployment of models to Amazon SageMaker: +- [SageMaker Jumpstart](https://github.com/awslabs/generative-ai-cdk-constructs/blob/main/src/patterns/gen-ai/aws-model-deployment-sagemaker/README_jumpstart.md): Deploy a foundation model from Amazon SageMaker JumpStart to an Amazon SageMaker endpoint. +- [Hugging Face](https://github.com/awslabs/generative-ai-cdk-constructs/blob/main/src/patterns/gen-ai/aws-model-deployment-sagemaker/README_hugging_face.md): Deploy a foundation model from Hugging Face to an Amazon SageMaker endpoint (models supported by [HuggingFace LLM Inference container](https://huggingface.co/blog/sagemaker-huggingface-llm)) +- [Custom model](https://github.com/awslabs/generative-ai-cdk-constructs/blob/main/src/patterns/gen-ai/aws-model-deployment-sagemaker/README_custom_sagemaker_endpoint.md): Deploy a foundation model from an S3 location to an Amazon SageMaker endpoint + +These constructs can be consumed separately through the [Generative AI CDK Constructs](https://github.com/awslabs/generative-ai-cdk-constructs) library. + +You can see examples in the [lib/models/index.ts](https://github.com/aws-samples/aws-genai-llm-chatbot/blob/main/lib/models/index.ts) file demonstrating how to deploy several models like Llama2 13B chat, Mistral 8x7B or IDEFICS. + +For additional samples demonstrating how to deploy models using these constructs, you can refer to the related [samples repository](https://github.com/aws-samples/generative-ai-cdk-constructs-samples). + +### Custom inference code + +While the options above are preferred, for broader compatibility, the sample also showcases deployment of all other models from Hugging Face not supported by HuggingFace LLM Infernce container using custom inference code. This process is powered by AWS CodeBuild. + +For this kind of deployment you need to choose the right container for your model from [this list of AWS Deep Learning Containers](https://github.com/aws/deep-learning-containers/blob/master/available_images.md). Based on PyTorch/Transformers versions, Python version etc. An example on how to use this construct is available [here](https://github.com/aws-samples/aws-genai-llm-chatbot/tree/main/lib/rag-engines/sagemaker-rag-models). + +### Adapters + +This samples provides [adapters](https://github.com/aws-samples/aws-genai-llm-chatbot/tree/main/lib/model-interfaces/langchain/functions/request-handler/adapters) for several models out of the box. The model you want to deploy might not have an existing adapter available, thus you will need to develop one. [This documentation](https://github.com/aws-samples/aws-genai-llm-chatbot/tree/main/lib/model-interfaces/langchain) provides steps to build you own adapter. + +### Precautions + +***Cost***: Be mindful of the costs associated with AWS resources, especially with SageMaker models which are billed by the hour. Leaving serverful resources running for extended periods or deploying numerous LLMs can quickly lead to increased costs. + +***Licensing***: These constructs allow you to interact with models from third party providers. Your use of the third-party generative AI (GAI) models is governed by the terms provided to you by the third-party GAI model providers when you acquired your license to use them (for example, their terms of service, license agreement, acceptable use policy, and privacy policy). + +You are responsible for ensuring that your use of the third-party GAI models comply with the terms governing them, and any laws, rules, regulations, policies, or standards that apply to you. + +You are also responsible for making your own independent assessment of the third-party GAI models that you use, including their outputs and how third-party GAI model providers use any data that might be transmitted to them based on your deployment configuration. AWS does not make any representations, warranties, or guarantees regarding the third-party GAI models, which are “Third-Party Content” under your agreement with AWS. This construct is offered to you as “AWS Content” under your agreement with AWS. \ No newline at end of file diff --git a/lib/sagemaker-model/README.md b/lib/sagemaker-model/README.md deleted file mode 100644 index 5aa110d4e..000000000 --- a/lib/sagemaker-model/README.md +++ /dev/null @@ -1,97 +0,0 @@ -## SageMaker Model Construct - -A prupose-built CDK Construct, [SageMakerModel](./index.ts), which helps facilitate the deployment of model to SageMaker, you can use this layer to deploy: -- Models from SageMaker Foundation Models -- Models from SageMaker Jumpstart. -- Model supported by [HuggingFace LLM Inference container](https://huggingface.co/blog/sagemaker-huggingface-llm). -- Models from HuggingFace with custom inference code. - - -# ⚠️ Precautions ⚠️ - -Before you begin using the sample, there are certain precautions you must take into account: - -- **Cost Management with self hosted models**: Be mindful of the costs associated with AWS resources, especially with SageMaker models which are billed by the hour. While the sample is designed to be cost-effective, leaving serverful resources running for extended periods or deploying numerous LLMs can quickly lead to increased costs. - -- **Licensing obligations**: If you choose to use any datasets or models alongside the provided samples, ensure you check LLM code and comply with all licensing obligations attached to them. - -- **This is a sample**: the code provided as part of this repository shouldn't be used for production workloads without further reviews and adaptation. - -# Preview Access and Service Quotas - -- **Instance type quota increase** -You might consider requesting an increase in service quota for specific SageMaker instance types such as the `ml.g5` instance type. This will give access to latest generation of GPU/Multi-GPU instances types. You can do this from the AWS console. - -- **Foundation Models Preview Access** -If you are looking to deploy models from SageMaker foundation models, you need to request preview access from the AWS console. -Futhermore, make sure which regions are currently supported for SageMaker foundation models. - - -#### Deploy from SageMaker Foundation/Jumpstart Models -The sample allows you to deploy models from [**Amazon SageMaker Foundation models**](https://docs.aws.amazon.com/sagemaker/latest/dg/jumpstart-foundation-models-choose.html) by specifying the model ARN. This simplifies the deployment process of these AI models on AWS. - -```typescript -new SageMakerModel(this, 'FoundationModelId', { - vpc, - region: this.region, - model: { - type: DeploymentType.ModelPackage, - modelId: 'modelId', // i.e. ai21/j2-grande-instruct-v1 - this is an arbitrary ID - instanceType: 'instanceType', // i.e. ml.g5.12xlarge - packages: (scope) => - new cdk.CfnMapping(scope, 'ModelPackageMapping', { - lazy: true, - mapping: { - 'region': { arn: 'container-arn' }, - }, - }), - }, -}); -``` -The `container-arn` of interest can be found in different places: - -- For SM Foundation Models, some model cards exposes the ARN, otherwise you need to deploy one manually from the console and copy the `ModelPackage` ARN from `SageMaker -> Models -> Deployed model` in the console - -- For SM Jumpstart Models, at the moment, you need to deploy the model of insterest from SageMaker Studio and then from the console and copy the `ModelPackage` ARN from `SageMaker -> Models -> Deployed model` in the console. - - -#### Hugging Face LLM Inference Container -The solution provides support for all publicly accessible LLMs supported by [HuggingFace LLM Inference container](https://huggingface.co/blog/sagemaker-huggingface-llm), thereby expanding your model options and letting you leverage a wide variety of pre-trained models available on this platform. - -```typescript -new SageMakerModel(this, 'HFModel', { - vpc, - region: this.region, - model: { - type: DeploymentType.Container, - modelId: 'modelId', // i.e. tiiuae/falcon-40b-instruct - this must match HuggingFace Model ID - container: ContainerImages.HF_PYTORCH_LLM_TGI_INFERENCE_LATEST, - instanceType: 'instanceType', // i.e. ml.g5.24xlarge - env: { - ... - }, - }, - }); -``` - -#### Models with custom inference -While the options above are preferred, for broader compatibility, the sample also showcases deployment of all other models from Hugging Face not supported by HuggingFace LLM Infernce container using custom inference code. This process is powered by **AWS CodeBuild**. - -For this kind of deployment you need to choose the right container for your model from this list of [AWS Deep Learning Containers](https://github.com/aws/deep-learning-containers/blob/master/available_images.md). Based on PyTorch/Transformers versions, Python version etc. - -```typescript -new SageMakerModel(this, 'ModelId', { - vpc, - region: this.region, - model: { - type: DeploymentType.CustomInference, - modelId: 'modelId', // i.e. sentence-transformers/all-MiniLM-L6-v2 - this must match HuggingFace Model ID - codeFolder: 'localFolder', // see for example ./lib/aurora-semantic-search/embeddings-model - container: 'container-arn', // One from https://github.com/aws/deep-learning-containers/blob/master/available_images.md - instanceType: 'instanceType', // i.e. g5.12xlarge - codeBuildComputeType: codebuild.ComputeType.LARGE, // Size of CodeBuild instance. Must have enough storage to download the whole model repository from HuggingFace - } -}); -``` - -An example of how this deployment type is used in this repo can be found [here](../rag-sources/aurora-pgvector/index.ts#L120), where it's used to deploy an embedding model from HuggingFace. \ No newline at end of file