Skip to content

Commit

Permalink
Release v3.3.0 into Main
Browse files Browse the repository at this point in the history
  • Loading branch information
estohlmann authored Nov 26, 2024
2 parents 7c50bc5 + a9e6453 commit 1f36944
Show file tree
Hide file tree
Showing 76 changed files with 2,389 additions and 451 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ cdk.context.json
.venv
.DS_Store
*.iml
*.code-workspace

# Coverage Statistic Folders
coverage
Expand Down
8 changes: 3 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ repos:
name: isort (python)

- repo: https://github.com/ambv/black
rev: '23.10.1'
rev: '24.10.0'
hooks:
- id: black

Expand All @@ -66,21 +66,19 @@ repos:
args: [--exit-non-zero-on-fix]

- repo: https://github.com/pycqa/flake8
rev: '6.1.0'
rev: '7.1.1'
hooks:
- id: flake8
additional_dependencies:
- flake8-docstrings
- flake8-broken-line
- flake8-bugbear
- flake8-comprehensions
- flake8-debugger
- flake8-string-format
args:
- --docstring-convention=numpy
- --max-line-length=120
- --extend-immutable-calls=Query,fastapi.Depends,fastapi.params.Depends
- --ignore=B008 # Ignore error for function calls in argument defaults
- --ignore=B008,E203 # Ignore error for function calls in argument defaults
exclude: ^(__init__.py$|.*\/__init__.py$)


Expand Down
19 changes: 19 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
# v3.3.0
## Key Features
### RAG ETL Pipeline
- This feature introduces a second RAG ingestion capability for LISA customers. Today, customers can manually upload documents via the chatbot user interface directly into a vector store. With this new ingestion pipeline, customers have a flexible, scalable solution for automating the loading of documents into configured vector stores.

## Enhancements
- Implemented a confirmation modal prior to closing the create model wizard, enhancing user control and preventing accidental data loss
- Added functionality allowing users to optionally override auto-generated security groups with custom security groups at deployment time

## Acknowledgements
* @bedanley
* @djhorne-amazon
* @estohlmann
* @dustins

**Full Changelog**: https://github.com/awslabs/LISA/compare/v3.2.1...v3.3.0


# v3.2.1
## Bug Fixes
- Resolved issue where subnet wasn't being passed into ec2 instance creation
Expand All @@ -13,6 +31,7 @@

**Full Changelog**: https://github.com/awslabs/LISA/compare/v3.2.0...v3.2.1


# v3.2.0
## Key Features
### Enhanced Deployment Configuration
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ DEPLOYMENT_STAGE := prod
endif

# ACCOUNT_NUMBERS_ECR - AWS account numbers that need to be logged into with Docker CLI to use ECR
ifneq ($(yq '.accountNumbersEcr'), )
ACCOUNT_NUMBERS_ECR := $(shell cat $(PROJECT_DIR)/config-custom.yaml | yq .accountNumbersEcr[])
endif

# Append deployed account number to array for dockerLogin rule
ACCOUNT_NUMBERS_ECR := $(ACCOUNT_NUMBERS_ECR) $(ACCOUNT_NUMBER)
Expand All @@ -101,7 +103,9 @@ ifneq ($(findstring $(DEPLOYMENT_STAGE),$(STACK)),$(DEPLOYMENT_STAGE))
endif

# MODEL_IDS - IDs of models to deploy
ifneq ($(yq '.ecsModels'), )
MODEL_IDS := $(shell cat $(PROJECT_DIR)/config-custom.yaml | yq '.ecsModels[].modelName')
endif

# MODEL_BUCKET - S3 bucket containing model artifacts
MODEL_BUCKET := $(shell cat $(PROJECT_DIR)/config-custom.yaml | yq '.s3BucketModels')
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.2.1
3.3.0
37 changes: 10 additions & 27 deletions ecs_model_deployer/src/lib/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
*/

// Models for schema validation.
import * as cdk from 'aws-cdk-lib';
import * as ec2 from 'aws-cdk-lib/aws-ec2';
import { AmiHardwareType } from 'aws-cdk-lib/aws-ecs';
import { z } from 'zod';
import { SecurityGroupConfigSchema } from '../../../lib/schema';

const VERSION: string = '2.0.1';

Expand Down Expand Up @@ -64,19 +64,6 @@ export type RegisteredModel = {
streaming?: boolean;
};

/**
* Custom security groups for application.
*
* @property {ec2.SecurityGroup} ecsModelAlbSg - ECS model application load balancer security group.
* @property {ec2.SecurityGroup} restApiAlbSg - REST API application load balancer security group.
* @property {ec2.SecurityGroup} lambdaSecurityGroup - Lambda security group.
*/
export type SecurityGroups = {
ecsModelAlbSg: ec2.SecurityGroup;
restApiAlbSg: ec2.SecurityGroup;
lambdaSecurityGroup: ec2.SecurityGroup;
};

/**
* Metadata for a specific EC2 instance type.
*
Expand Down Expand Up @@ -336,7 +323,7 @@ const ImageRegistryAsset = z.object({
*
* @property {string} baseImage - Base image for the container.
* @property {Record<string, string>} [environment={}] - Environment variables for the container.
* @property {ContainerHealthCheckConfig} [healthCheckConfig={}] - Health check configuration for the container.
* @property {ContainerHealthCheckConfigSchema} [healthCheckConfig={}] - Health check configuration for the container.
* @property {number} [sharedMemorySize=0] - The value for the size of the /dev/shm volume.
*/
const ContainerConfigSchema = z.object({
Expand Down Expand Up @@ -380,7 +367,7 @@ const HealthCheckConfigSchema = z.object({
* Configuration schema for the load balancer.
*
* @property {string} [sslCertIamArn=null] - SSL certificate IAM ARN for load balancer.
* @property {HealthCheckConfig} healthCheckConfig - Health check configuration for the load balancer.
* @property {HealthCheckConfigSchema} healthCheckConfig - Health check configuration for the load balancer.
* @property {string} domainName - Domain name to use instead of the load balancer's default DNS name.
*/
const LoadBalancerConfigSchema = z.object({
Expand Down Expand Up @@ -414,7 +401,7 @@ const MetricConfigSchema = z.object({
* @property {number} [cooldown=420] - Cool down period in seconds between scaling activities.
* @property {number} [defaultInstanceWarmup=180] - Default warm-up time in seconds until a newly launched instance can
send metrics to CloudWatch.
* @property {MetricConfig} metricConfig - Metric configuration for auto scaling.
* @property {MetricConfigSchema} metricConfig - Metric configuration for auto scaling.
*/
const AutoScalingConfigSchema = z.object({
blockDeviceVolumeSize: z.number().min(30).default(30),
Expand All @@ -432,7 +419,7 @@ const AutoScalingConfigSchema = z.object({
* @property {AutoScalingConfigSchema} autoScalingConfig - Configuration for auto scaling settings.
* @property {Record<string,string>} buildArgs - Optional build args to be applied when creating the
* task container if containerConfig.image.type is ASSET
* @property {ContainerConfig} containerConfig - Configuration for the container.
* @property {ContainerConfigSchema} containerConfig - Configuration for the container.
* @property {number} [containerMemoryBuffer=2048] - This is the amount of memory to buffer (or subtract off)
* from the total instance memory, if we don't include this,
* the container can have a hard time finding available RAM
Expand All @@ -441,7 +428,7 @@ const AutoScalingConfigSchema = z.object({
* @property {identifier} modelType - Unique identifier for the cluster which will be used when naming resources
* @property {string} instanceType - EC2 instance type for running the model.
* @property {boolean} [internetFacing=false] - Whether or not the cluster will be configured as internet facing
* @property {LoadBalancerConfig} loadBalancerConfig - Configuration for load balancer settings.
* @property {LoadBalancerConfigSchema} loadBalancerConfig - Configuration for load balancer settings.
*/
const EcsBaseConfigSchema = z.object({
amiHardwareType: z.nativeEnum(AmiHardwareType),
Expand Down Expand Up @@ -477,9 +464,9 @@ export type ECSConfig = EcsBaseConfig;
* @property {string} modelType - Type of model.
* @property {string} instanceType - EC2 instance type for running the model.
* @property {string} inferenceContainer - Prebuilt inference container for serving model.
* @property {ContainerConfig} containerConfig - Configuration for the container.
* @property {ContainerConfigSchema} containerConfig - Configuration for the container.
* @property {AutoScalingConfigSchema} autoScalingConfig - Configuration for auto scaling settings.
* @property {LoadBalancerConfig} loadBalancerConfig - Configuration for load balancer settings.
* @property {LoadBalancerConfigSchema} loadBalancerConfig - Configuration for load balancer settings.
* @property {string} [localModelCode='/opt/model-code'] - Path in container for local model code.
* @property {string} [modelHosting='ecs'] - Model hosting.
*/
Expand Down Expand Up @@ -562,19 +549,14 @@ const PypiConfigSchema = z.object({
* @property {string} deploymentStage - Deployment stage for the application.
* @property {string} removalPolicy - Removal policy for resources (destroy or retain).
* @property {boolean} [runCdkNag=false] - Whether to run CDK Nag checks.
* @property {string} [lambdaSourcePath='./lambda'] - Path to Lambda source code dir.
* @property {string} s3BucketModels - S3 bucket for models.
* @property {string} mountS3DebUrl - URL for S3-mounted Debian package.
* @property {string[]} [accountNumbersEcr=null] - List of AWS account numbers for ECR repositories.
* @property {boolean} [deployRag=false] - Whether to deploy RAG stacks.
* @property {boolean} [deployChat=true] - Whether to deploy chat stacks.
* @property {boolean} [deployUi=true] - Whether to deploy UI stacks.
* @property {string} logLevel - Log level for application.
* @property {AuthConfigSchema} authConfig - Authorization configuration.
* @property {RagRepositoryConfigSchema} ragRepositoryConfig - Rag Repository configuration.
* @property {RagFileProcessingConfigSchema} ragFileProcessingConfig - Rag file processing configuration.
* @property {EcsModelConfigSchema[]} ecsModels - Array of ECS model configurations.
* @property {ApiGatewayConfigSchema} apiGatewayConfig - API Gateway Endpoint configuration.
* @property {string} [nvmeHostMountPath='/nvme'] - Host path for NVMe drives.
* @property {string} [nvmeContainerMountPath='/nvme'] - Container path for NVMe drives.
* @property {Array<{ Key: string, Value: string }>} [tags=null] - Array of key-value pairs for tagging.
Expand All @@ -591,6 +573,7 @@ const RawConfigSchema = z
vpcId: z.string().optional(),
deploymentStage: z.string(),
removalPolicy: z.union([z.literal('destroy'), z.literal('retain')]).transform((value) => REMOVAL_POLICIES[value]),
securityGroupConfig: SecurityGroupConfigSchema.optional(),
s3BucketModels: z.string(),
mountS3DebUrl: z.string().optional(),
pypiConfig: PypiConfigSchema.optional().default({
Expand Down
7 changes: 7 additions & 0 deletions example_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ s3BucketModels: hf-models-gaiic
# subnets:
# - subnetId:
# ipv4CidrBlock:
# securityGroupConfig: # If securityGroupConfig is provided, all security groups must be overridden. Vector stores SGs are optional based on deployment preferences.
# modelSecurityGroupId: sg-0123456789abcdef
# restAlbSecurityGroupId: sg-0123456789abcdef
# lambdaSecurityGroupId: sg-0123456789abcdef
# liteLlmDbSecurityGroupId: sg-0123456789abcdef
# openSearchSecurityGroupId: sg-0123456789abcdef #Optional
# pgVectorSecurityGroupId: sg-0123456789abcdef #Optional
# The following configuration will allow for using a custom domain for the chat user interface.
# If this option is specified, the API Gateway invocation URL will NOT work on its own as the application URL.
# Users must use the custom domain for the user interface to work if this option is populated.
Expand Down
13 changes: 13 additions & 0 deletions lambda/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
31 changes: 20 additions & 11 deletions lambda/dockerimagebuilder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,16 @@ def handler(event: Dict[str, Any], context) -> Dict[str, Any]: # type: ignore [
rendered_userdata = rendered_userdata.replace("{{IMAGE_ID}}", image_tag)

try:
instances = ec2_resource.create_instances(
ImageId=ami_id,
SubnetId=os.environ["LISA_SUBNET_ID"],
MinCount=1,
MaxCount=1,
InstanceType="m5.large",
UserData=rendered_userdata,
IamInstanceProfile={"Arn": os.environ["LISA_INSTANCE_PROFILE"]},
BlockDeviceMappings=[{"DeviceName": "/dev/xvda", "Ebs": {"VolumeSize": 32}}],
TagSpecifications=[
# Define common parameters
instance_params = {
"ImageId": ami_id,
"MinCount": 1,
"MaxCount": 1,
"InstanceType": "m5.large",
"UserData": rendered_userdata,
"IamInstanceProfile": {"Arn": os.environ["LISA_INSTANCE_PROFILE"]},
"BlockDeviceMappings": [{"DeviceName": "/dev/xvda", "Ebs": {"VolumeSize": 32}}],
"TagSpecifications": [
{
"ResourceType": "instance",
"Tags": [
Expand All @@ -86,7 +86,16 @@ def handler(event: Dict[str, Any], context) -> Dict[str, Any]: # type: ignore [
],
}
],
)
}

# Add SubnetId if specified in environment
if "LISA_SUBNET_ID" in os.environ:
instance_params["SubnetId"] = os.environ["LISA_SUBNET_ID"]

# Create instance with parameters
instances = ec2_resource.create_instances(**instance_params)

return {"instance_id": instances[0].instance_id, "image_tag": image_tag}

except ClientError as e:
raise e
12 changes: 6 additions & 6 deletions lambda/models/domain_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ class AutoScalingConfig(BaseModel):
defaultInstanceWarmup: PositiveInt
metricConfig: MetricConfig

@model_validator(mode="after") # type: ignore
@model_validator(mode="after")
def validate_auto_scaling_config(self) -> Self:
"""Validate autoScalingConfig values."""
if self.minCapacity > self.maxCapacity:
Expand All @@ -115,7 +115,7 @@ class AutoScalingInstanceConfig(BaseModel):
maxCapacity: Optional[PositiveInt] = None
desiredCapacity: Optional[PositiveInt] = None

@model_validator(mode="after") # type: ignore
@model_validator(mode="after")
def validate_auto_scaling_instance_config(self) -> Self:
"""Validate autoScalingInstanceConfig values."""
config_fields = [self.minCapacity, self.maxCapacity, self.desiredCapacity]
Expand Down Expand Up @@ -155,7 +155,7 @@ class ContainerConfig(BaseModel):
healthCheckConfig: ContainerHealthCheckConfig
environment: Optional[Dict[str, str]] = {}

@field_validator("environment") # type: ignore
@field_validator("environment")
@classmethod
def validate_environment(cls, environment: Dict[str, str]) -> Dict[str, str]:
"""Validate that all keys in Dict are not empty."""
Expand Down Expand Up @@ -201,7 +201,7 @@ class CreateModelRequest(BaseModel):
modelUrl: Optional[str] = None
streaming: Optional[bool] = False

@model_validator(mode="after") # type: ignore
@model_validator(mode="after")
def validate_create_model_request(self) -> Self:
"""Validate whole request object."""
# Validate that an embedding model cannot be set as streaming-enabled
Expand Down Expand Up @@ -252,7 +252,7 @@ class UpdateModelRequest(BaseModel):
modelType: Optional[ModelType] = None
streaming: Optional[bool] = None

@model_validator(mode="after") # type: ignore
@model_validator(mode="after")
def validate_update_model_request(self) -> Self:
"""Validate whole request object."""
fields = [
Expand All @@ -273,7 +273,7 @@ def validate_update_model_request(self) -> Self:
raise ValueError("Embedding model cannot be set with streaming enabled.")
return self

@field_validator("autoScalingInstanceConfig") # type: ignore
@field_validator("autoScalingInstanceConfig")
@classmethod
def validate_autoscaling_instance_config(cls, config: AutoScalingInstanceConfig) -> AutoScalingInstanceConfig:
"""Validate that the AutoScaling instance config has at least one positive value."""
Expand Down
Loading

0 comments on commit 1f36944

Please sign in to comment.