Release v3.3.0 into Main

awslabs · Nov 26, 2024 · 1f36944 · 1f36944
2 parents 7c50bc5 + a9e6453
commit 1f36944
Show file tree

Hide file tree

Showing 76 changed files with 2,389 additions and 451 deletions.
diff --git a/.gitignore b/.gitignore
@@ -24,6 +24,7 @@ cdk.context.json
 .venv
 .DS_Store
 *.iml
+*.code-workspace
 
 # Coverage Statistic Folders
 coverage

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -55,7 +55,7 @@ repos:
       name: isort (python)
 
 - repo: https://github.com/ambv/black
-  rev: '23.10.1'
+  rev: '24.10.0'
   hooks:
     - id: black
 
@@ -66,21 +66,19 @@ repos:
       args: [--exit-non-zero-on-fix]
 
 - repo: https://github.com/pycqa/flake8
-  rev: '6.1.0'
+  rev: '7.1.1'
   hooks:
     - id: flake8
       additional_dependencies:
-        - flake8-docstrings
         - flake8-broken-line
         - flake8-bugbear
         - flake8-comprehensions
         - flake8-debugger
         - flake8-string-format
       args:
-        - --docstring-convention=numpy
         - --max-line-length=120
         - --extend-immutable-calls=Query,fastapi.Depends,fastapi.params.Depends
-        - --ignore=B008 # Ignore error for function calls in argument defaults
+        - --ignore=B008,E203 # Ignore error for function calls in argument defaults
       exclude: ^(__init__.py$|.*\/__init__.py$)
 
 

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,21 @@
+# v3.3.0
+## Key Features
+### RAG ETL Pipeline
+- This feature introduces a second RAG ingestion capability for LISA customers. Today, customers can manually upload documents via the chatbot user interface directly into a vector store. With this new ingestion pipeline, customers have a flexible, scalable solution for automating the loading of documents into configured vector stores.
+
+## Enhancements
+- Implemented a confirmation modal prior to closing the create model wizard, enhancing user control and preventing accidental data loss
+- Added functionality allowing users to optionally override auto-generated security groups with custom security groups at deployment time
+
+## Acknowledgements
+* @bedanley
+* @djhorne-amazon
+* @estohlmann
+* @dustins
+
+**Full Changelog**: https://github.com/awslabs/LISA/compare/v3.2.1...v3.3.0
+
+
 # v3.2.1
 ## Bug Fixes
 - Resolved issue where subnet wasn't being passed into ec2 instance creation
@@ -13,6 +31,7 @@
 
 **Full Changelog**: https://github.com/awslabs/LISA/compare/v3.2.0...v3.2.1
 
+
 # v3.2.0
 ## Key Features
 ### Enhanced Deployment Configuration

diff --git a/Makefile b/Makefile
@@ -86,7 +86,9 @@ DEPLOYMENT_STAGE := prod
 endif
 
 # ACCOUNT_NUMBERS_ECR - AWS account numbers that need to be logged into with Docker CLI to use ECR
+ifneq ($(yq '.accountNumbersEcr'), )
 ACCOUNT_NUMBERS_ECR := $(shell cat $(PROJECT_DIR)/config-custom.yaml | yq .accountNumbersEcr[])
+endif
 
 # Append deployed account number to array for dockerLogin rule
 ACCOUNT_NUMBERS_ECR := $(ACCOUNT_NUMBERS_ECR) $(ACCOUNT_NUMBER)
@@ -101,7 +103,9 @@ ifneq ($(findstring $(DEPLOYMENT_STAGE),$(STACK)),$(DEPLOYMENT_STAGE))
 endif
 
 # MODEL_IDS - IDs of models to deploy
+ifneq ($(yq '.ecsModels'), )
 MODEL_IDS := $(shell cat $(PROJECT_DIR)/config-custom.yaml | yq '.ecsModels[].modelName')
+endif
 
 # MODEL_BUCKET - S3 bucket containing model artifacts
 MODEL_BUCKET := $(shell cat $(PROJECT_DIR)/config-custom.yaml | yq '.s3BucketModels')

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-3.2.1
+3.3.0
diff --git a/ecs_model_deployer/src/lib/schema.ts b/ecs_model_deployer/src/lib/schema.ts
@@ -12,13 +12,13 @@
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
-*/
+ */
 
 // Models for schema validation.
 import * as cdk from 'aws-cdk-lib';
-import * as ec2 from 'aws-cdk-lib/aws-ec2';
 import { AmiHardwareType } from 'aws-cdk-lib/aws-ecs';
 import { z } from 'zod';
+import { SecurityGroupConfigSchema } from '../../../lib/schema';
 
 const VERSION: string = '2.0.1';
 
@@ -64,19 +64,6 @@ export type RegisteredModel = {
     streaming?: boolean;
 };
 
-/**
- * Custom security groups for application.
- *
- * @property {ec2.SecurityGroup} ecsModelAlbSg - ECS model application load balancer security group.
- * @property {ec2.SecurityGroup} restApiAlbSg - REST API application load balancer security group.
- * @property {ec2.SecurityGroup} lambdaSecurityGroup - Lambda security group.
- */
-export type SecurityGroups = {
-    ecsModelAlbSg: ec2.SecurityGroup;
-    restApiAlbSg: ec2.SecurityGroup;
-    lambdaSecurityGroup: ec2.SecurityGroup;
-};
-
 /**
  * Metadata for a specific EC2 instance type.
  *
@@ -336,7 +323,7 @@ const ImageRegistryAsset = z.object({
  *
  * @property {string} baseImage - Base image for the container.
  * @property {Record<string, string>} [environment={}] - Environment variables for the container.
- * @property {ContainerHealthCheckConfig} [healthCheckConfig={}] - Health check configuration for the container.
+ * @property {ContainerHealthCheckConfigSchema} [healthCheckConfig={}] - Health check configuration for the container.
  * @property {number} [sharedMemorySize=0] - The value for the size of the /dev/shm volume.
  */
 const ContainerConfigSchema = z.object({
@@ -380,7 +367,7 @@ const HealthCheckConfigSchema = z.object({
  * Configuration schema for the load balancer.
  *
  * @property {string} [sslCertIamArn=null] - SSL certificate IAM ARN for load balancer.
- * @property {HealthCheckConfig} healthCheckConfig - Health check configuration for the load balancer.
+ * @property {HealthCheckConfigSchema} healthCheckConfig - Health check configuration for the load balancer.
  * @property {string} domainName - Domain name to use instead of the load balancer's default DNS name.
  */
 const LoadBalancerConfigSchema = z.object({
@@ -414,7 +401,7 @@ const MetricConfigSchema = z.object({
 * @property {number} [cooldown=420] - Cool down period in seconds between scaling activities.
 * @property {number} [defaultInstanceWarmup=180] - Default warm-up time in seconds until a newly launched instance can
                                                    send metrics to CloudWatch.
-* @property {MetricConfig} metricConfig - Metric configuration for auto scaling.
+ * @property {MetricConfigSchema} metricConfig - Metric configuration for auto scaling.
 */
 const AutoScalingConfigSchema = z.object({
     blockDeviceVolumeSize: z.number().min(30).default(30),
@@ -432,7 +419,7 @@ const AutoScalingConfigSchema = z.object({
  * @property {AutoScalingConfigSchema} autoScalingConfig - Configuration for auto scaling settings.
  * @property {Record<string,string>} buildArgs - Optional build args to be applied when creating the
  *                                              task container if containerConfig.image.type is ASSET
- * @property {ContainerConfig} containerConfig - Configuration for the container.
+ * @property {ContainerConfigSchema} containerConfig - Configuration for the container.
  * @property {number} [containerMemoryBuffer=2048] - This is the amount of memory to buffer (or subtract off)
  *                                                from the total instance memory, if we don't include this,
  *                                                the container can have a hard time finding available RAM
@@ -441,7 +428,7 @@ const AutoScalingConfigSchema = z.object({
  * @property {identifier} modelType - Unique identifier for the cluster which will be used when naming resources
  * @property {string} instanceType - EC2 instance type for running the model.
  * @property {boolean} [internetFacing=false] - Whether or not the cluster will be configured as internet facing
- * @property {LoadBalancerConfig} loadBalancerConfig - Configuration for load balancer settings.
+ * @property {LoadBalancerConfigSchema} loadBalancerConfig - Configuration for load balancer settings.
  */
 const EcsBaseConfigSchema = z.object({
     amiHardwareType: z.nativeEnum(AmiHardwareType),
@@ -477,9 +464,9 @@ export type ECSConfig = EcsBaseConfig;
  * @property {string} modelType - Type of model.
  * @property {string} instanceType - EC2 instance type for running the model.
  * @property {string} inferenceContainer - Prebuilt inference container for serving model.
- * @property {ContainerConfig} containerConfig - Configuration for the container.
+ * @property {ContainerConfigSchema} containerConfig - Configuration for the container.
  * @property {AutoScalingConfigSchema} autoScalingConfig - Configuration for auto scaling settings.
- * @property {LoadBalancerConfig} loadBalancerConfig - Configuration for load balancer settings.
+ * @property {LoadBalancerConfigSchema} loadBalancerConfig - Configuration for load balancer settings.
  * @property {string} [localModelCode='/opt/model-code'] - Path in container for local model code.
  * @property {string} [modelHosting='ecs'] - Model hosting.
  */
@@ -562,19 +549,14 @@ const PypiConfigSchema = z.object({
  * @property {string} deploymentStage - Deployment stage for the application.
  * @property {string} removalPolicy - Removal policy for resources (destroy or retain).
  * @property {boolean} [runCdkNag=false] - Whether to run CDK Nag checks.
- * @property {string} [lambdaSourcePath='./lambda'] - Path to Lambda source code dir.
  * @property {string} s3BucketModels - S3 bucket for models.
  * @property {string} mountS3DebUrl - URL for S3-mounted Debian package.
  * @property {string[]} [accountNumbersEcr=null] - List of AWS account numbers for ECR repositories.
  * @property {boolean} [deployRag=false] - Whether to deploy RAG stacks.
  * @property {boolean} [deployChat=true] - Whether to deploy chat stacks.
  * @property {boolean} [deployUi=true] - Whether to deploy UI stacks.
  * @property {string} logLevel - Log level for application.
- * @property {AuthConfigSchema} authConfig - Authorization configuration.
- * @property {RagRepositoryConfigSchema} ragRepositoryConfig - Rag Repository configuration.
- * @property {RagFileProcessingConfigSchema} ragFileProcessingConfig - Rag file processing configuration.
  * @property {EcsModelConfigSchema[]} ecsModels - Array of ECS model configurations.
- * @property {ApiGatewayConfigSchema} apiGatewayConfig - API Gateway Endpoint configuration.
  * @property {string} [nvmeHostMountPath='/nvme'] - Host path for NVMe drives.
  * @property {string} [nvmeContainerMountPath='/nvme'] - Container path for NVMe drives.
  * @property {Array<{ Key: string, Value: string }>} [tags=null] - Array of key-value pairs for tagging.
@@ -591,6 +573,7 @@ const RawConfigSchema = z
         vpcId: z.string().optional(),
         deploymentStage: z.string(),
         removalPolicy: z.union([z.literal('destroy'), z.literal('retain')]).transform((value) => REMOVAL_POLICIES[value]),
+        securityGroupConfig: SecurityGroupConfigSchema.optional(),
         s3BucketModels: z.string(),
         mountS3DebUrl: z.string().optional(),
         pypiConfig: PypiConfigSchema.optional().default({

diff --git a/example_config.yaml b/example_config.yaml
@@ -13,6 +13,13 @@ s3BucketModels: hf-models-gaiic
 # subnets:
 #  - subnetId:
 #    ipv4CidrBlock:
+# securityGroupConfig: # If securityGroupConfig is provided, all security groups must be overridden. Vector stores SGs are optional based on deployment preferences.
+#   modelSecurityGroupId: sg-0123456789abcdef
+#   restAlbSecurityGroupId: sg-0123456789abcdef
+#   lambdaSecurityGroupId: sg-0123456789abcdef
+#   liteLlmDbSecurityGroupId: sg-0123456789abcdef
+#   openSearchSecurityGroupId: sg-0123456789abcdef #Optional
+#   pgVectorSecurityGroupId: sg-0123456789abcdef  #Optional
 # The following configuration will allow for using a custom domain for the chat user interface.
 # If this option is specified, the API Gateway invocation URL will NOT work on its own as the application URL.
 # Users must use the custom domain for the user interface to work if this option is populated.

diff --git a/lambda/__init__.py b/lambda/__init__.py
@@ -0,0 +1,13 @@
+#   Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+#   Licensed under the Apache License, Version 2.0 (the "License").
+#   You may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
diff --git a/lambda/dockerimagebuilder/__init__.py b/lambda/dockerimagebuilder/__init__.py
@@ -68,16 +68,16 @@ def handler(event: Dict[str, Any], context) -> Dict[str, Any]:  # type: ignore [
     rendered_userdata = rendered_userdata.replace("{{IMAGE_ID}}", image_tag)
 
     try:
-        instances = ec2_resource.create_instances(
-            ImageId=ami_id,
-            SubnetId=os.environ["LISA_SUBNET_ID"],
-            MinCount=1,
-            MaxCount=1,
-            InstanceType="m5.large",
-            UserData=rendered_userdata,
-            IamInstanceProfile={"Arn": os.environ["LISA_INSTANCE_PROFILE"]},
-            BlockDeviceMappings=[{"DeviceName": "/dev/xvda", "Ebs": {"VolumeSize": 32}}],
-            TagSpecifications=[
+        # Define common parameters
+        instance_params = {
+            "ImageId": ami_id,
+            "MinCount": 1,
+            "MaxCount": 1,
+            "InstanceType": "m5.large",
+            "UserData": rendered_userdata,
+            "IamInstanceProfile": {"Arn": os.environ["LISA_INSTANCE_PROFILE"]},
+            "BlockDeviceMappings": [{"DeviceName": "/dev/xvda", "Ebs": {"VolumeSize": 32}}],
+            "TagSpecifications": [
                 {
                     "ResourceType": "instance",
                     "Tags": [
@@ -86,7 +86,16 @@ def handler(event: Dict[str, Any], context) -> Dict[str, Any]:  # type: ignore [
                     ],
                 }
             ],
-        )
+        }
+
+        # Add SubnetId if specified in environment
+        if "LISA_SUBNET_ID" in os.environ:
+            instance_params["SubnetId"] = os.environ["LISA_SUBNET_ID"]
+
+        # Create instance with parameters
+        instances = ec2_resource.create_instances(**instance_params)
+
         return {"instance_id": instances[0].instance_id, "image_tag": image_tag}
+
     except ClientError as e:
         raise e
diff --git a/lambda/models/domain_objects.py b/lambda/models/domain_objects.py
@@ -98,7 +98,7 @@ class AutoScalingConfig(BaseModel):
     defaultInstanceWarmup: PositiveInt
     metricConfig: MetricConfig
 
-    @model_validator(mode="after")  # type: ignore
+    @model_validator(mode="after")
     def validate_auto_scaling_config(self) -> Self:
         """Validate autoScalingConfig values."""
         if self.minCapacity > self.maxCapacity:
@@ -115,7 +115,7 @@ class AutoScalingInstanceConfig(BaseModel):
     maxCapacity: Optional[PositiveInt] = None
     desiredCapacity: Optional[PositiveInt] = None
 
-    @model_validator(mode="after")  # type: ignore
+    @model_validator(mode="after")
     def validate_auto_scaling_instance_config(self) -> Self:
         """Validate autoScalingInstanceConfig values."""
         config_fields = [self.minCapacity, self.maxCapacity, self.desiredCapacity]
@@ -155,7 +155,7 @@ class ContainerConfig(BaseModel):
     healthCheckConfig: ContainerHealthCheckConfig
     environment: Optional[Dict[str, str]] = {}
 
-    @field_validator("environment")  # type: ignore
+    @field_validator("environment")
     @classmethod
     def validate_environment(cls, environment: Dict[str, str]) -> Dict[str, str]:
         """Validate that all keys in Dict are not empty."""
@@ -201,7 +201,7 @@ class CreateModelRequest(BaseModel):
     modelUrl: Optional[str] = None
     streaming: Optional[bool] = False
 
-    @model_validator(mode="after")  # type: ignore
+    @model_validator(mode="after")
     def validate_create_model_request(self) -> Self:
         """Validate whole request object."""
         # Validate that an embedding model cannot be set as streaming-enabled
@@ -252,7 +252,7 @@ class UpdateModelRequest(BaseModel):
     modelType: Optional[ModelType] = None
     streaming: Optional[bool] = None
 
-    @model_validator(mode="after")  # type: ignore
+    @model_validator(mode="after")
     def validate_update_model_request(self) -> Self:
         """Validate whole request object."""
         fields = [
@@ -273,7 +273,7 @@ def validate_update_model_request(self) -> Self:
             raise ValueError("Embedding model cannot be set with streaming enabled.")
         return self
 
-    @field_validator("autoScalingInstanceConfig")  # type: ignore
+    @field_validator("autoScalingInstanceConfig")
     @classmethod
     def validate_autoscaling_instance_config(cls, config: AutoScalingInstanceConfig) -> AutoScalingInstanceConfig:
         """Validate that the AutoScaling instance config has at least one positive value."""