From 7613ced927ad39f930812c65fdc49445621f5917 Mon Sep 17 00:00:00 2001 From: Evan Stohlmann Date: Mon, 2 Dec 2024 13:37:32 -0700 Subject: [PATCH] partition and litellm updates --- Makefile | 4 +-- lib/rag/state_machine/ingest-pipeline.ts | 13 +++++----- lib/schema.ts | 4 +++ .../src/utils/generate_litellm_config.py | 25 +++++++++++-------- 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index d90a6671..6b982bb0 100644 --- a/Makefile +++ b/Makefile @@ -86,7 +86,7 @@ DEPLOYMENT_STAGE := prod endif # ACCOUNT_NUMBERS_ECR - AWS account numbers that need to be logged into with Docker CLI to use ECR -ifneq ($(yq '.accountNumbersEcr'), ) +ifneq ($(shell cat $(PROJECT_DIR)/config-custom.yaml | yq '.accountNumbersEcr'), null) ACCOUNT_NUMBERS_ECR := $(shell cat $(PROJECT_DIR)/config-custom.yaml | yq .accountNumbersEcr[]) endif @@ -103,7 +103,7 @@ ifneq ($(findstring $(DEPLOYMENT_STAGE),$(STACK)),$(DEPLOYMENT_STAGE)) endif # MODEL_IDS - IDs of models to deploy -ifneq ($(yq '.ecsModels'), ) +ifneq ($(shell cat $(PROJECT_DIR)/config-custom.yaml | yq '.ecsModels'), null) MODEL_IDS := $(shell cat $(PROJECT_DIR)/config-custom.yaml | yq '.ecsModels[].modelName') endif diff --git a/lib/rag/state_machine/ingest-pipeline.ts b/lib/rag/state_machine/ingest-pipeline.ts index 6f231cd7..c3b76717 100644 --- a/lib/rag/state_machine/ingest-pipeline.ts +++ b/lib/rag/state_machine/ingest-pipeline.ts @@ -37,6 +37,7 @@ import { Rule, Schedule, EventPattern, RuleTargetInput, EventField } from 'aws-c import { SfnStateMachine } from 'aws-cdk-lib/aws-events-targets'; import { RagRepositoryType } from '../../schema'; import * as kms from 'aws-cdk-lib/aws-kms'; +import * as cdk from 'aws-cdk-lib'; type PipelineConfig = { chunkOverlap: number; @@ -111,8 +112,8 @@ export class IngestPipelineStateMachine extends Construct { effect: Effect.ALLOW, actions: ['s3:GetObject', 's3:ListBucket'], resources: [ - `arn:aws:s3:::${pipelineConfig.s3Bucket}`, - `arn:aws:s3:::${pipelineConfig.s3Bucket}/*` + `arn:${cdk.Aws.PARTITION}:s3:::${pipelineConfig.s3Bucket}`, + `arn:${cdk.Aws.PARTITION}:s3:::${pipelineConfig.s3Bucket}/*` ] }); @@ -176,10 +177,10 @@ export class IngestPipelineStateMachine extends Construct { effect: Effect.ALLOW, actions: ['ssm:GetParameter'], resources: [ - `arn:aws:ssm:${process.env.CDK_DEFAULT_REGION}:${process.env.CDK_DEFAULT_ACCOUNT}:parameter${config.deploymentPrefix}/LisaServeRagPGVectorConnectionInfo`, - `arn:aws:ssm:${process.env.CDK_DEFAULT_REGION}:${process.env.CDK_DEFAULT_ACCOUNT}:parameter${config.deploymentPrefix}/lisaServeRagRepositoryEndpoint`, - `arn:aws:ssm:${process.env.CDK_DEFAULT_REGION}:${process.env.CDK_DEFAULT_ACCOUNT}:parameter${config.deploymentPrefix}/lisaServeRestApiUri`, - `arn:aws:ssm:${process.env.CDK_DEFAULT_REGION}:${process.env.CDK_DEFAULT_ACCOUNT}:parameter${config.deploymentPrefix}/managementKeySecretName` + `arn:${cdk.Aws.PARTITION}:ssm:${cdk.Aws.REGION}:${cdk.Aws.ACCOUNT_ID}:parameter${config.deploymentPrefix}/LisaServeRagPGVectorConnectionInfo`, + `arn:${cdk.Aws.PARTITION}:ssm:${cdk.Aws.REGION}:${cdk.Aws.ACCOUNT_ID}:parameter${config.deploymentPrefix}/lisaServeRagRepositoryEndpoint`, + `arn:${cdk.Aws.PARTITION}:ssm:${cdk.Aws.REGION}:${cdk.Aws.ACCOUNT_ID}:parameter${config.deploymentPrefix}/lisaServeRestApiUri`, + `arn:${cdk.Aws.PARTITION}:ssm:${cdk.Aws.REGION}:${cdk.Aws.ACCOUNT_ID}:parameter${config.deploymentPrefix}/managementKeySecretName` ] }), new PolicyStatement({ diff --git a/lib/schema.ts b/lib/schema.ts index 9cc01bbd..9b533802 100644 --- a/lib/schema.ts +++ b/lib/schema.ts @@ -537,6 +537,10 @@ const LiteLLMConfig = z.object({ 'Key string must be defined for model management operations, and it must start with "sk-".' + 'This can be any string, and a random UUID is recommended. Example: sk-f132c7cc-059c-481b-b5ca-a42e191672aa', ), + general_settings: z.any().optional(), + litellm_settings: z.any().optional(), + router_settings: z.any().optional(), + environment_variables: z.any().optional() }) .describe('Core LiteLLM configuration - see https://litellm.vercel.app/docs/proxy/configs#all-settings for more details about each field.'); diff --git a/lib/serve/rest-api/src/utils/generate_litellm_config.py b/lib/serve/rest-api/src/utils/generate_litellm_config.py index 9ced3150..5bac7f48 100644 --- a/lib/serve/rest-api/src/utils/generate_litellm_config.py +++ b/lib/serve/rest-api/src/utils/generate_litellm_config.py @@ -50,10 +50,14 @@ def generate_config(filepath: str) -> None: config_models = [] # ensure config_models is a list and not None config_models.extend(litellm_model_params) config_contents["model_list"] = config_models - config_contents["litellm_settings"] = { - "drop_params": True, # drop unrecognized param instead of failing the request on it - "request_timeout": 600, - } + if "litellm_settings" not in config_contents: + config_contents["litellm_settings"] = {} + config_contents["litellm_settings"].update( + { + "drop_params": True, # drop unrecognized param instead of failing the request on it + "request_timeout": 600, + } + ) # Get database connection info db_param_response = ssm_client.get_parameter(Name=os.environ["LITELLM_DB_INFO_PS_NAME"]) @@ -65,13 +69,14 @@ def generate_config(filepath: str) -> None: f"/{db_params['dbName']}" ) - config_contents.update( + if "general_settings" not in config_contents: + config_contents["general_settings"] = {} + + config_contents["general_settings"].update( { - "general_settings": { - "store_model_in_db": True, - "database_url": connection_str, - "master_key": config_contents["db_key"], - } + "store_model_in_db": True, + "database_url": connection_str, + "master_key": config_contents["db_key"], } )