Version 1.3.2

abacusai · May 24, 2024 · 19c5599 · 19c5599
1 parent 59b4176
commit 19c5599
Show file tree

Hide file tree

Showing 481 changed files with 134,152 additions and 60,688 deletions.
diff --git a/abacusai/__init__.py b/abacusai/__init__.py
@@ -4,4 +4,4 @@
 from .streaming_client import StreamingClient
 
 
-__version__ = "1.2.5"
+__version__ = "1.3.2"
diff --git a/abacusai/api_class/ai_agents.py b/abacusai/api_class/ai_agents.py
@@ -28,7 +28,7 @@ class WorkflowNodeInputSchema(ApiClass):
     A react-jsonschema-form conformant schema for workflow node input.
 
     Args:
-        json_schema (dict): The json schema for the input conformant to react-jsonschema-form specification. Must define keys like "title", "type" and "properties".
+        json_schema (dict): The json schema for the input conformant to react-jsonschema-form specification. Must define keys like "title", "type" and "properties". Supported elements - Checkbox, Radio Button, Dropdown, Textarea, Number, Date, File Upload. Not supported - Nested elements, arrays and other complex types.
         ui_schema (dict): The ui schema for the input conformant to react-jsonschema-form specification.
     """
     json_schema: dict

diff --git a/abacusai/api_class/connectors.py b/abacusai/api_class/connectors.py
@@ -0,0 +1,43 @@
+import dataclasses
+
+from . import enums
+from .abstract import _ApiClassFactory
+from .dataset import DatasetConfig
+
+
+@dataclasses.dataclass
+class StreamingConnectorDatasetConfig(DatasetConfig):
+    """
+    An abstract class for dataset configs specific to streaming connectors.
+
+    Args:
+        streaming_connector_type (StreamingConnectorType): The type of streaming connector
+    """
+    streaming_connector_type: enums.StreamingConnectorType = dataclasses.field(default=None, repr=False, init=False)
+
+    @classmethod
+    def _get_builder(cls):
+        return _StreamingConnectorDatasetConfigFactory
+
+
+@dataclasses.dataclass
+class KafkaDatasetConfig(StreamingConnectorDatasetConfig):
+    """
+    Dataset config for Kafka Streaming Connector
+
+    Args:
+        topic (str): The kafka topic to consume
+    """
+    topic: str
+
+    def __post_init__(self):
+        self.streaming_connector_type = enums.StreamingConnectorType.KAFKA
+
+
+@dataclasses.dataclass
+class _StreamingConnectorDatasetConfigFactory(_ApiClassFactory):
+    config_abstract_class = StreamingConnectorDatasetConfig
+    config_class_key = 'streaming_connector_type'
+    config_class_map = {
+        enums.StreamingConnectorType.KAFKA: KafkaDatasetConfig,
+    }
diff --git a/abacusai/api_class/dataset.py b/abacusai/api_class/dataset.py
@@ -4,6 +4,17 @@
 from .enums import OcrMode
 
 
+@dataclasses.dataclass
+class DatasetConfig(ApiClass):
+    """
+    An abstract class for dataset configs
+
+    Args:
+        is_documentset (bool): Whether the dataset is a document set
+    """
+    is_documentset: bool = dataclasses.field(default=None)
+
+
 @dataclasses.dataclass
 class ParsingConfig(ApiClass):
     """

diff --git a/abacusai/api_class/dataset_application_connector.py b/abacusai/api_class/dataset_application_connector.py
@@ -1,28 +1,27 @@
 import dataclasses
 
 from . import enums
-from .abstract import ApiClass, _ApiClassFactory
+from .abstract import _ApiClassFactory
+from .dataset import DatasetConfig
 
 
 @dataclasses.dataclass
-class DatasetConfig(ApiClass):
+class ApplicationConnectorDatasetConfig(DatasetConfig):
     """
     An abstract class for dataset configs specific to application connectors.
 
     Args:
         application_connector_type(enums.ApplicationConnectorType): The type of application connector
-        is_documentset (bool): Whether the dataset is a document set
     """
     application_connector_type: enums.ApplicationConnectorType = dataclasses.field(default=None, repr=False, init=False)
-    is_documentset: bool = dataclasses.field(default=None)
 
     @classmethod
     def _get_builder(cls):
-        return _DatasetConfigFactory
+        return _ApplicationConnectorDatasetConfigFactory
 
 
 @dataclasses.dataclass
-class ConfluenceDatasetConfig(DatasetConfig):
+class ConfluenceDatasetConfig(ApplicationConnectorDatasetConfig):
     """
     Dataset config for Confluence Application Connector
     Args:
@@ -42,7 +41,7 @@ def __post_init__(self):
 
 
 @dataclasses.dataclass
-class GoogleAnalyticsDatasetConfig(DatasetConfig):
+class GoogleAnalyticsDatasetConfig(ApplicationConnectorDatasetConfig):
     """
     Dataset config for Google Analytics Application Connector
 
@@ -60,7 +59,7 @@ def __post_init__(self):
 
 
 @dataclasses.dataclass
-class GoogleDriveDatasetConfig(DatasetConfig):
+class GoogleDriveDatasetConfig(ApplicationConnectorDatasetConfig):
     """
     Dataset config for Google Drive Application Connector
 
@@ -80,7 +79,7 @@ def __post_init__(self):
 
 
 @dataclasses.dataclass
-class JiraDatasetConfig(DatasetConfig):
+class JiraDatasetConfig(ApplicationConnectorDatasetConfig):
     """
     Dataset config for Jira Application Connector
 
@@ -100,7 +99,7 @@ def __post_init__(self):
 
 
 @dataclasses.dataclass
-class OneDriveDatasetConfig(DatasetConfig):
+class OneDriveDatasetConfig(ApplicationConnectorDatasetConfig):
     """
     Dataset config for OneDrive Application Connector
 
@@ -120,13 +119,12 @@ def __post_init__(self):
 
 
 @dataclasses.dataclass
-class SharepointDatasetConfig(DatasetConfig):
+class SharepointDatasetConfig(ApplicationConnectorDatasetConfig):
     """
     Dataset config for Sharepoint Application Connector
 
     Args:
         location (str): The regex location of the files to fetch
-        is_documentset (bool): Whether the dataset is a document set
         csv_delimiter (str): If the file format is CSV, use a specific csv delimiter
         extract_bounding_boxes (bool): Signifies whether to extract bounding boxes out of the documents. Only valid if is_documentset if True
         merge_file_schemas (bool): Signifies if the merge file schema policy is enabled. Not applicable if is_documentset is True
@@ -141,7 +139,7 @@ def __post_init__(self):
 
 
 @dataclasses.dataclass
-class ZendeskDatasetConfig(DatasetConfig):
+class ZendeskDatasetConfig(ApplicationConnectorDatasetConfig):
     """
     Dataset config for Zendesk Application Connector
 
@@ -155,7 +153,7 @@ def __post_init__(self):
 
 
 @dataclasses.dataclass
-class AbacusUsageMetricsDatasetConfig(DatasetConfig):
+class AbacusUsageMetricsDatasetConfig(ApplicationConnectorDatasetConfig):
     """
     Dataset config for Abacus Usage Metrics Application Connector
 
@@ -172,7 +170,7 @@ def __post_init__(self):
 
 
 @dataclasses.dataclass
-class FreshserviceDatasetConfig(DatasetConfig):
+class FreshserviceDatasetConfig(ApplicationConnectorDatasetConfig):
     """
     Dataset config for Freshservice Application Connector
     """
@@ -182,8 +180,8 @@ def __post_init__(self):
 
 
 @dataclasses.dataclass
-class _DatasetConfigFactory(_ApiClassFactory):
-    config_abstract_class = DatasetConfig
+class _ApplicationConnectorDatasetConfigFactory(_ApiClassFactory):
+    config_abstract_class = ApplicationConnectorDatasetConfig
     config_class_key = 'application_connector_type'
     config_class_map = {
         enums.ApplicationConnectorType.CONFLUENCE: ConfluenceDatasetConfig,

diff --git a/abacusai/api_class/document_retriever.py b/abacusai/api_class/document_retriever.py
@@ -7,7 +7,7 @@
 @dataclasses.dataclass
 class VectorStoreConfig(ApiClass):
     """
-    Configs for vector store indexing.
+    Config for indexing options of a document retriever. Default values of optional arguments are heuristically selected by the Abacus.AI platform based on the underlying data.
 
     Args:
         chunk_size (int): The size of text chunks in the vector store.
@@ -25,8 +25,4 @@ class VectorStoreConfig(ApiClass):
     prune_vectors: bool = dataclasses.field(default=None)
 
 
-@dataclasses.dataclass
-class DocumentRetrieverConfig(VectorStoreConfig):
-    """
-    Configs for document retriever. If any configuration value is not explicitly provided, Abacus.AI will automatically infer default values based on the data.
-    """
+DocumentRetrieverConfig = VectorStoreConfig
diff --git a/abacusai/api_class/enums.py b/abacusai/api_class/enums.py
@@ -362,7 +362,11 @@ class ApplicationConnectorType(ApiEnum):
     ABACUSUSAGEMETRICS = 'ABACUSUSAGEMETRICS'
     MICROSOFTAUTH = 'MICROSOFTAUTH'
     FRESHSERVICE = 'FRESHSERVICE'
-    ZENDESKSUNSHINEMESSAGING = 'zendesksunshinemessaging'
+    ZENDESKSUNSHINEMESSAGING = 'ZENDESKSUNSHINEMESSAGING'
+
+
+class StreamingConnectorType(ApiEnum):
+    KAFKA = 'KAFKA'
 
 
 class PythonFunctionArgumentType(ApiEnum):
@@ -428,7 +432,7 @@ class LLMName(ApiEnum):
     GEMINI_1_5_PRO = 'GEMINI_1_5_PRO'
     MIXTRAL_CHAT = 'MIXTRAL_CHAT'
     MISTRAL_MEDIUM = 'MISTRAL_MEDIUM'
-    ABACUS_SMAUG2 = 'ABACUS_SMAUG2'
+    ABACUS_SMAUG3 = 'ABACUS_SMAUG3'
 
 
 class MonitorAlertType(ApiEnum):
@@ -532,13 +536,6 @@ class DataType(ApiEnum):
     STRUCT = 'struct'
     NULL = 'null'
 
-    @classmethod
-    def from_str(cls, value):
-        if not value:
-            return None
-        default_map = {val.value: val for val in DataType}
-        return default_map[value.lower()]
-
 
 class AgentInterface(ApiEnum):
     # Duplicated in reainternal.enums, both should be kept in sync

diff --git a/abacusai/api_class/model.py b/abacusai/api_class/model.py
@@ -438,18 +438,19 @@ class ChatLLMTrainingConfig(TrainingConfig):
     Training config for the CHAT_LLM problem type
 
     Args:
-        document_retrievers (List[str]): List of document retriever names to use for the feature stores this model was trained with.
-        num_completion_tokens (int): Default for maximum number of tokens for chat answers. Reducing this will get faster responses which are more succinct
-        temperature (float): The generative LLM temperature
+        document_retrievers (List[str]): List of names of document retrievers to use as vector stores of information for RAG responses.
+        num_completion_tokens (int): Default for maximum number of tokens for chat answers. Reducing this will get faster responses which are more succinct.
+        temperature (float): The generative LLM temperature.
         retrieval_columns (list): Include the metadata column values in the retrieved search results.
         filter_columns (list): Allow users to filter the document retrievers on these metadata columns.
-        include_general_knowledge (bool): Allow the LLM to rely not just on search results, but to fall back on general knowledge.
+        include_general_knowledge (bool): Allow the LLM to rely not just on RAG search results, but to fall back on general knowledge. Disabled by default.
+        enable_web_search (bool) : Allow the LLM to use Web Search Engines to retrieve information for better results.
         behavior_instructions (str): Customize the overall role instructions for the LLM.
-        response_instructions (str): Customize instructions for what the LLM responses should look like.
-        enable_llm_rewrite (bool): Enable LLM rewrite for the ChatLLM. If None, LLM rewrite will happen automatically. Defaults to False.
+        response_instructions (str): Customized instructions for how the LLM should respond.
+        enable_llm_rewrite (bool): If enabled, an LLM will rewrite the RAG queries sent to document retriever. Disabled by default.
         column_filtering_instructions (str): Instructions for a LLM call to automatically generate filter expressions on document metadata to retrieve relevant documents for the conversation.
         keyword_requirement_instructions (str): Instructions for a LLM call to automatically generate keyword requirements to retrieve relevant documents for the conversation.
-        query_rewrite_instructions (str): Instructions for a LLM call to rewrite a search query.
+        query_rewrite_instructions (str): Special instructions for the LLM which rewrites the RAG query.
         max_search_results (int): Maximum number of search results in the retrieval augmentation step. If we know that the questions are likely to have snippets which are easily matched in the documents, then a lower number will help with accuracy.
         data_feature_group_ids: (List[str]): List of feature group IDs to use to possibly query for the ChatLLM. The created ChatLLM is commonly referred to as DataLLM.
         data_prompt_context (str): Prompt context for the data feature group IDs.
@@ -458,33 +459,32 @@ class ChatLLMTrainingConfig(TrainingConfig):
         search_score_cutoff (float): Minimum search score to consider a document as a valid search result.
         database_connector_id (str): Database connector ID to use for the ChatLLM.
         database_connector_tables (List[str]): List of tables to use from the database connector for the ChatLLM.
-        enable_code_execution (bool): Enable code execution in the ChatLLM.
-        metadata_columns (list): DEPRECATED. Include the metadata column values in the retrieved search results.
-        lookup_rewrite_instructions (str): DEPRECATED. Instructions for a LLM call to rewrite a search query.
-    """
-    document_retrievers: List[str] = None
-    num_completion_tokens: int = None
-    temperature: float = None
-    retrieval_columns: list = None
-    filter_columns: list = None
-    include_general_knowledge: bool = None
-    behavior_instructions: str = None
-    response_instructions: str = None
-    enable_llm_rewrite: bool = False
-    column_filtering_instructions: str = None
-    keyword_requirement_instructions: str = None
-    query_rewrite_instructions: str = None
-    max_search_results: int = None
-    data_feature_group_ids: List[str] = None
-    data_prompt_context: str = None
-    hide_generated_sql: bool = None
-    disable_data_summarization: bool = None
-    search_score_cutoff: float = None
-    database_connector_id: str = None
-    database_connector_tables: List[str] = None
-    enable_code_execution: bool = None
-    metadata_columns: list = None
-    lookup_rewrite_instructions: str = None
+        enable_code_execution (bool): Enable python code execution in the ChatLLM. This equips the LLM with a python kernel in which all its code is executed.
+    """
+    document_retrievers: List[str] = dataclasses.field(default=None)
+    num_completion_tokens: int = dataclasses.field(default=None)
+    temperature: float = dataclasses.field(default=None)
+    retrieval_columns: list = dataclasses.field(default=None)
+    filter_columns: list = dataclasses.field(default=None)
+    include_general_knowledge: bool = dataclasses.field(default=None)
+    enable_web_search: bool = dataclasses.field(default=None)
+    behavior_instructions: str = dataclasses.field(default=None)
+    response_instructions: str = dataclasses.field(default=None)
+    enable_llm_rewrite: bool = dataclasses.field(default=None)
+    column_filtering_instructions: str = dataclasses.field(default=None)
+    keyword_requirement_instructions: str = dataclasses.field(default=None)
+    query_rewrite_instructions: str = dataclasses.field(default=None)
+    max_search_results: int = dataclasses.field(default=None)
+    data_feature_group_ids: List[str] = dataclasses.field(default=None)
+    data_prompt_context: str = dataclasses.field(default=None)
+    hide_generated_sql: bool = dataclasses.field(default=None)
+    disable_data_summarization: bool = dataclasses.field(default=None)
+    search_score_cutoff: float = dataclasses.field(default=None)
+    database_connector_id: str = dataclasses.field(default=None)
+    database_connector_tables: List[str] = dataclasses.field(default=None)
+    enable_code_execution: bool = dataclasses.field(default=None)
+    metadata_columns: list = dataclasses.field(default=None, metadata={'deprecated': True})
+    lookup_rewrite_instructions: str = dataclasses.field(default=None, metadata={'deprecated': True})
 
     def __post_init__(self):
         self.problem_type = enums.ProblemType.CHAT_LLM

diff --git a/abacusai/batch_prediction.py b/abacusai/batch_prediction.py
@@ -84,7 +84,7 @@ def __init__(self, client, batchPredictionId=None, createdAt=None, name=None, de
             BatchPredictionArgs, globalPredictionArgs)
         self.batch_prediction_args = client._build_class(getattr(
             api_class, batchPredictionArgsType, BatchPredictionArgs) if batchPredictionArgsType else BatchPredictionArgs, batchPredictionArgs)
-        self.deprecated_keys = {'explanations', 'global_prediction_args'}
+        self.deprecated_keys = {'global_prediction_args', 'explanations'}
 
     def __repr__(self):
         repr_dict = {f'batch_prediction_id': repr(self.batch_prediction_id), f'created_at': repr(self.created_at), f'name': repr(self.name), f'deployment_id': repr(self.deployment_id), f'file_connector_output_location': repr(self.file_connector_output_location), f'database_connector_id': repr(self.database_connector_id), f'database_output_configuration': repr(self.database_output_configuration), f'file_output_format': repr(self.file_output_format), f'connector_type': repr(self.connector_type), f'legacy_input_location': repr(self.legacy_input_location), f'output_feature_group_id': repr(self.output_feature_group_id), f'feature_group_table_name': repr(self.feature_group_table_name), f'output_feature_group_table_name': repr(self.output_feature_group_table_name), f'summary_feature_group_table_name': repr(self.summary_feature_group_table_name), f'csv_input_prefix': repr(

diff --git a/abacusai/batch_prediction_version.py b/abacusai/batch_prediction_version.py
@@ -100,7 +100,7 @@ def __init__(self, client, batchPredictionVersion=None, batchPredictionId=None,
             BatchPredictionArgs, globalPredictionArgs)
         self.batch_prediction_args = client._build_class(getattr(
             api_class, batchPredictionArgsType, BatchPredictionArgs) if batchPredictionArgsType else BatchPredictionArgs, batchPredictionArgs)
-        self.deprecated_keys = {'explanations', 'global_prediction_args'}
+        self.deprecated_keys = {'global_prediction_args', 'explanations'}
 
     def __repr__(self):
         repr_dict = {f'batch_prediction_version': repr(self.batch_prediction_version), f'batch_prediction_id': repr(self.batch_prediction_id), f'status': repr(self.status), f'drift_monitor_status': repr(self.drift_monitor_status), f'deployment_id': repr(self.deployment_id), f'model_id': repr(self.model_id), f'model_version': repr(self.model_version), f'predictions_started_at': repr(self.predictions_started_at), f'predictions_completed_at': repr(self.predictions_completed_at), f'database_output_error': repr(self.database_output_error), f'total_predictions': repr(self.total_predictions), f'failed_predictions': repr(self.failed_predictions), f'database_connector_id': repr(self.database_connector_id), f'database_output_configuration': repr(self.database_output_configuration), f'file_connector_output_location': repr(self.file_connector_output_location), f'file_output_format': repr(self.file_output_format), f'connector_type': repr(self.connector_type), f'legacy_input_location': repr(self.legacy_input_location), f'error': repr(self.error), f'drift_monitor_error': repr(self.drift_monitor_error), f'monitor_warnings': repr(self.monitor_warnings), f'csv_input_prefix': repr(
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,4 +4,4 @@
		from .streaming_client import StreamingClient


		__version__ = "1.2.5"
		__version__ = "1.3.2"