Version 1.4.16

abacusai · Oct 25, 2024 · b64d5f8 · b64d5f8
1 parent e6de9b7
commit b64d5f8
Show file tree

Hide file tree

Showing 277 changed files with 2,698 additions and 2,834 deletions.
diff --git a/abacusai/__init__.py b/abacusai/__init__.py
@@ -218,4 +218,4 @@
 from .workflow_node_template import WorkflowNodeTemplate
 
 
-__version__ = "1.4.15"
+__version__ = "1.4.16"
diff --git a/abacusai/api_class/ai_agents.py b/abacusai/api_class/ai_agents.py
@@ -598,21 +598,25 @@ class WorkflowNodeTemplateInput(ApiClass):
     Args:
         name (str): A unique name of the input.
         is_required (bool): Indicates whether the input is required. Defaults to False.
+        description (str): The description of this input.
     """
     name: str
     is_required: bool = dataclasses.field(default=False)
+    description: str = dataclasses.field(default='')
 
     def to_dict(self):
         return {
             'name': self.name,
-            'is_required': self.is_required
+            'is_required': self.is_required,
+            'description': self.description
         }
 
     @classmethod
     def from_dict(cls, mapping: dict):
         return cls(
             name=mapping['name'],
-            is_required=mapping.get('is_required', False)
+            is_required=mapping.get('is_required', False),
+            description=mapping.get('description', '')
         )
 
 
@@ -624,19 +628,23 @@ class WorkflowNodeTemplateOutput(ApiClass):
     Args:
         name (str): The name of the output.
         variable_type (WorkflowNodeOutputType): The type of the output.
+        description (str): The description of this output.
     """
     name: str
     variable_type: enums.WorkflowNodeOutputType = dataclasses.field(default=enums.WorkflowNodeOutputType.ANY)
+    description: str = dataclasses.field(default='')
 
     def to_dict(self):
         return {
             'name': self.name,
-            'variable_type': self.variable_type.value
+            'variable_type': self.variable_type.value,
+            'description': self.description
         }
 
     @classmethod
     def from_dict(cls, mapping: dict):
         return cls(
             name=mapping['name'],
-            variable_type=enums.WorkflowNodeOutputType(mapping.get('variable_type', 'ANY'))
+            variable_type=enums.WorkflowNodeOutputType(mapping.get('variable_type', 'ANY')),
+            description=mapping.get('description', '')
         )
diff --git a/abacusai/api_class/dataset.py b/abacusai/api_class/dataset.py
@@ -63,15 +63,18 @@ def __post_init__(self):
             self.extract_bounding_boxes = self.highlight_relevant_text  # Highlight_relevant text acts as a wrapper over extract_bounding_boxes
 
     def _detect_ocr_mode(self):
-        if self.document_type is None:  # Maps to Auto mode
-            return self.ocr_mode  # TODO: Link with paddle OCR
-        if self.document_type == DocumentType.TEXT:
-            return OcrMode.DEFAULT
-        elif self.document_type == DocumentType.TABLES_AND_FORMS:
-            return OcrMode.LAYOUT
-        elif self.document_type == DocumentType.EMBEDDED_IMAGES:
-            return OcrMode.SCANNED
-        return OcrMode.DEFAULT
+        if self.document_type is not None:
+            if self.document_type == DocumentType.TEXT:
+                return OcrMode.DEFAULT
+            elif self.document_type == DocumentType.TABLES_AND_FORMS:
+                return OcrMode.LAYOUT
+            elif self.document_type == DocumentType.EMBEDDED_IMAGES:
+                return OcrMode.SCANNED
+            elif self.document_type == DocumentType.SCANNED_TEXT:
+                return OcrMode.DEFAULT
+        if self.ocr_mode is not None:
+            return self.ocr_mode
+        return OcrMode.AUTO
 
 
 @dataclasses.dataclass

diff --git a/abacusai/api_class/document_retriever.py b/abacusai/api_class/document_retriever.py
@@ -17,6 +17,8 @@ class VectorStoreConfig(ApiClass):
         score_multiplier_column (str): If provided, will use the values in this metadata column to modify the relevance score of returned chunks for all queries.
         prune_vectors (bool): Transform vectors using SVD so that the average component of vectors in the corpus are removed.
         index_metadata_columns (bool): If True, metadata columns of the FG will also be used for indexing and querying.
+        use_document_summary (bool): If True, uses the summary of the document in addition to chunks of the document for indexing and querying.
+        summary_instructions (str): Instructions for the LLM to generate the document summary.
     """
     chunk_size: int = dataclasses.field(default=None)
     chunk_overlap_fraction: float = dataclasses.field(default=None)
@@ -25,6 +27,8 @@ class VectorStoreConfig(ApiClass):
     score_multiplier_column: str = dataclasses.field(default=None)
     prune_vectors: bool = dataclasses.field(default=None)
     index_metadata_columns: bool = dataclasses.field(default=None)
+    use_document_summary: bool = dataclasses.field(default=None)
+    summary_instructions: str = dataclasses.field(default=None)
 
 
 DocumentRetrieverConfig = VectorStoreConfig
diff --git a/abacusai/api_class/enums.py b/abacusai/api_class/enums.py
@@ -409,7 +409,6 @@ class ApplicationConnectorType(ApiEnum):
     GOOGLESHEETSUSER = 'GOOGLESHEETSUSER'
     GOOGLEDOCSUSER = 'GOOGLEDOCSUSER'
     ONEDRIVEUSER = 'ONEDRIVEUSER'
-    JIRAAUTH = 'JIRAAUTH'
     TEAMSSCRAPER = 'TEAMSSCRAPER'
     GITHUBUSER = 'GITHUBUSER'
     OKTASAML = 'OKTASAML'

diff --git a/abacusai/api_class/model.py b/abacusai/api_class/model.py
@@ -483,6 +483,9 @@ class ChatLLMTrainingConfig(TrainingConfig):
         unknown_answer_phrase (str): Fallback response when the LLM can't find an answer.
         enable_tool_bar (bool): Enable the tool bar in Enterprise ChatLLM to provide additional functionalities like tool_use, web_search, image_gen, etc.
         enable_inline_source_citations (bool): Enable inline citations of the sources in the response.
+        response_format: (str): When set to 'JSON', the LLM will generate a JSON formatted string.
+        json_response_instructions (str): Instructions to be followed while generating the json_response if `response_format` is set to "JSON". This can include the schema information if the schema is dynamic and its keys cannot be pre-determined.
+        json_response_schema (str): Specifies the JSON schema that the model should adhere to if `response_format` is set to "JSON". This should be a json-formatted string where each field of the expected schema is mapped to a dictionary containing the fields 'type', 'required' and 'description'. For example - '{"sample_field": {"type": "integer", "required": true, "description": "Sample Field"}}'
     """
     document_retrievers: List[str] = dataclasses.field(default=None)
     num_completion_tokens: int = dataclasses.field(default=None)
@@ -516,6 +519,9 @@ class ChatLLMTrainingConfig(TrainingConfig):
     unknown_answer_phrase: str = dataclasses.field(default=None)
     enable_tool_bar: bool = dataclasses.field(default=None)
     enable_inline_source_citations: bool = dataclasses.field(default=None)
+    response_format: str = dataclasses.field(default=None)
+    json_response_instructions: str = dataclasses.field(default=None)
+    json_response_schema: str = dataclasses.field(default=None)
 
     def __post_init__(self):
         self.problem_type = enums.ProblemType.CHAT_LLM

diff --git a/abacusai/api_client_utils.py b/abacusai/api_client_utils.py
@@ -400,6 +400,7 @@ class DocstoreUtils:
     PAGE_LLM_OCR = 'page_llm_ocr'
     PAGE_TABLE_TEXT = 'page_table_text'
     MARKDOWN_FEATURES = 'markdown_features'
+    MULTI_MODE_OCR_TEXT = 'multi_mode_ocr_text'
     DOCUMENT_PROCESSING_CONFIG = 'document_processing_config'
     DOCUMENT_PROCESSING_VERSION = 'document_processing_version'
 
@@ -591,7 +592,7 @@ def combine_doc_info(group):
             document_data[cls.EMBEDDED_TEXT] = combine_page_texts(info.get(
                 cls.EMBEDDED_TEXT) or info.get(cls.PAGE_TEXT) or '' for info in page_infos)
             page_texts = None
-            for k in [cls.PAGE_MARKDOWN, cls.PAGE_LLM_OCR, cls.PAGE_TABLE_TEXT]:
+            for k in [cls.MULTI_MODE_OCR_TEXT, cls.PAGE_MARKDOWN, cls.PAGE_LLM_OCR, cls.PAGE_TABLE_TEXT]:
                 if page_infos[0].get(k) and not document_data.get(cls.PAGE_MARKDOWN):
                     document_data[cls.PAGE_MARKDOWN] = page_texts = [
                         page.get(k, '') for page in page_infos]

diff --git a/abacusai/batch_prediction.py b/abacusai/batch_prediction.py
@@ -84,7 +84,7 @@ def __init__(self, client, batchPredictionId=None, createdAt=None, name=None, de
             BatchPredictionArgs, globalPredictionArgs)
         self.batch_prediction_args = client._build_class(getattr(
             api_class, batchPredictionArgsType, BatchPredictionArgs) if batchPredictionArgsType else BatchPredictionArgs, batchPredictionArgs)
-        self.deprecated_keys = {'global_prediction_args', 'explanations'}
+        self.deprecated_keys = {'explanations', 'global_prediction_args'}
 
     def __repr__(self):
         repr_dict = {f'batch_prediction_id': repr(self.batch_prediction_id), f'created_at': repr(self.created_at), f'name': repr(self.name), f'deployment_id': repr(self.deployment_id), f'file_connector_output_location': repr(self.file_connector_output_location), f'database_connector_id': repr(self.database_connector_id), f'database_output_configuration': repr(self.database_output_configuration), f'file_output_format': repr(self.file_output_format), f'connector_type': repr(self.connector_type), f'legacy_input_location': repr(self.legacy_input_location), f'output_feature_group_id': repr(self.output_feature_group_id), f'feature_group_table_name': repr(self.feature_group_table_name), f'output_feature_group_table_name': repr(self.output_feature_group_table_name), f'summary_feature_group_table_name': repr(self.summary_feature_group_table_name), f'csv_input_prefix': repr(

diff --git a/abacusai/batch_prediction_version.py b/abacusai/batch_prediction_version.py
@@ -100,7 +100,7 @@ def __init__(self, client, batchPredictionVersion=None, batchPredictionId=None,
             BatchPredictionArgs, globalPredictionArgs)
         self.batch_prediction_args = client._build_class(getattr(
             api_class, batchPredictionArgsType, BatchPredictionArgs) if batchPredictionArgsType else BatchPredictionArgs, batchPredictionArgs)
-        self.deprecated_keys = {'global_prediction_args', 'explanations'}
+        self.deprecated_keys = {'explanations', 'global_prediction_args'}
 
     def __repr__(self):
         repr_dict = {f'batch_prediction_version': repr(self.batch_prediction_version), f'batch_prediction_id': repr(self.batch_prediction_id), f'status': repr(self.status), f'drift_monitor_status': repr(self.drift_monitor_status), f'deployment_id': repr(self.deployment_id), f'model_id': repr(self.model_id), f'model_version': repr(self.model_version), f'predictions_started_at': repr(self.predictions_started_at), f'predictions_completed_at': repr(self.predictions_completed_at), f'database_output_error': repr(self.database_output_error), f'total_predictions': repr(self.total_predictions), f'failed_predictions': repr(self.failed_predictions), f'database_connector_id': repr(self.database_connector_id), f'database_output_configuration': repr(self.database_output_configuration), f'file_connector_output_location': repr(self.file_connector_output_location), f'file_output_format': repr(self.file_output_format), f'connector_type': repr(self.connector_type), f'legacy_input_location': repr(self.legacy_input_location), f'error': repr(self.error), f'drift_monitor_error': repr(self.drift_monitor_error), f'monitor_warnings': repr(self.monitor_warnings), f'csv_input_prefix': repr(

diff --git a/abacusai/client.py b/abacusai/client.py
@@ -616,7 +616,7 @@ class BaseApiClient:
         client_options (ClientOptions): Optional API client configurations
         skip_version_check (bool): If true, will skip checking the server's current API version on initializing the client
     """
-    client_version = '1.4.15'
+    client_version = '1.4.16'
 
     def __init__(self, api_key: str = None, server: str = None, client_options: ClientOptions = None, skip_version_check: bool = False, include_tb: bool = False):
         self.api_key = api_key
@@ -1498,16 +1498,18 @@ def list_application_connector_objects(self, application_connector_id: str) -> l
             application_connector_id (str): Unique string identifier for the application connector."""
         return self._call_api('listApplicationConnectorObjects', 'GET', query_params={'applicationConnectorId': application_connector_id})
 
-    def get_connector_auth(self, service: Union[ApplicationConnectorType, str], scopes: List = None) -> ApplicationConnector:
+    def get_connector_auth(self, service: Union[ApplicationConnectorType, str] = None, scopes: List = None, name: str = None, is_user_level: bool = True) -> ApplicationConnector:
         """Get the authentication details for a given connector.
 
         Args:
             service (ApplicationConnectorType): The service name.
             scopes (List): The scopes to request for the connector.
+            name (str): Name of the connector.
+            is_user_level (bool): Type of connector to be fetched (user level or org level).
 
         Returns:
             ApplicationConnector: The application connector with the authentication details."""
-        return self._call_api('getConnectorAuth', 'GET', query_params={'service': service, 'scopes': scopes}, parse_type=ApplicationConnector)
+        return self._call_api('getConnectorAuth', 'GET', query_params={'service': service, 'scopes': scopes, 'name': name, 'isUserLevel': is_user_level}, parse_type=ApplicationConnector)
 
     def list_streaming_connectors(self) -> List[StreamingConnector]:
         """Retrieves a list of all streaming connectors along with their corresponding attributes.

diff --git a/abacusai/document_retriever_config.py b/abacusai/document_retriever_config.py
@@ -14,21 +14,25 @@ class DocumentRetrieverConfig(AbstractApiClass):
             scoreMultiplierColumn (str): The values in this metadata column are used to modify the relevance scores of returned chunks.
             pruneVectors (bool): Corpus specific transformation of vectors that applies dimensional reduction techniques to strip common components from the vectors.
             indexMetadataColumns (bool): If True, metadata columns of the FG will also be used for indexing and querying.
+            useDocumentSummary (bool): If True, uses the summary of the document in addition to chunks of the document for indexing and querying.
+            summaryInstructions (str): Instructions for the LLM to generate the document summary.
     """
 
-    def __init__(self, client, chunkSize=None, chunkOverlapFraction=None, textEncoder=None, scoreMultiplierColumn=None, pruneVectors=None, indexMetadataColumns=None):
+    def __init__(self, client, chunkSize=None, chunkOverlapFraction=None, textEncoder=None, scoreMultiplierColumn=None, pruneVectors=None, indexMetadataColumns=None, useDocumentSummary=None, summaryInstructions=None):
         super().__init__(client, None)
         self.chunk_size = chunkSize
         self.chunk_overlap_fraction = chunkOverlapFraction
         self.text_encoder = textEncoder
         self.score_multiplier_column = scoreMultiplierColumn
         self.prune_vectors = pruneVectors
         self.index_metadata_columns = indexMetadataColumns
+        self.use_document_summary = useDocumentSummary
+        self.summary_instructions = summaryInstructions
         self.deprecated_keys = {}
 
     def __repr__(self):
-        repr_dict = {f'chunk_size': repr(self.chunk_size), f'chunk_overlap_fraction': repr(self.chunk_overlap_fraction), f'text_encoder': repr(self.text_encoder), f'score_multiplier_column': repr(
-            self.score_multiplier_column), f'prune_vectors': repr(self.prune_vectors), f'index_metadata_columns': repr(self.index_metadata_columns)}
+        repr_dict = {f'chunk_size': repr(self.chunk_size), f'chunk_overlap_fraction': repr(self.chunk_overlap_fraction), f'text_encoder': repr(self.text_encoder), f'score_multiplier_column': repr(self.score_multiplier_column), f'prune_vectors': repr(
+            self.prune_vectors), f'index_metadata_columns': repr(self.index_metadata_columns), f'use_document_summary': repr(self.use_document_summary), f'summary_instructions': repr(self.summary_instructions)}
         class_name = "DocumentRetrieverConfig"
         repr_str = ',\n  '.join([f'{key}={value}' for key, value in repr_dict.items(
         ) if getattr(self, key, None) is not None and key not in self.deprecated_keys])
@@ -41,6 +45,6 @@ def to_dict(self):
         Returns:
             dict: The dict value representation of the class parameters
         """
-        resp = {'chunk_size': self.chunk_size, 'chunk_overlap_fraction': self.chunk_overlap_fraction, 'text_encoder': self.text_encoder,
-                'score_multiplier_column': self.score_multiplier_column, 'prune_vectors': self.prune_vectors, 'index_metadata_columns': self.index_metadata_columns}
+        resp = {'chunk_size': self.chunk_size, 'chunk_overlap_fraction': self.chunk_overlap_fraction, 'text_encoder': self.text_encoder, 'score_multiplier_column': self.score_multiplier_column,
+                'prune_vectors': self.prune_vectors, 'index_metadata_columns': self.index_metadata_columns, 'use_document_summary': self.use_document_summary, 'summary_instructions': self.summary_instructions}
         return {key: value for key, value in resp.items() if value is not None and key not in self.deprecated_keys}
diff --git a/docs/.buildinfo b/docs/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
-# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 12535596649b36dd1c7e0cddf2d1e3b2
+# This file records the configuration used when building these files. When it is not found, a full rebuild will be done.
+config: 787e52a6c82af449544cb6a1253baab3
 tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/docs/.buildinfo.bak b/docs/.buildinfo.bak
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 29cf0b9064b2c084484177223531079b
+config: 12535596649b36dd1c7e0cddf2d1e3b2
 tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/docs/_sources/autoapi/abacusai/api_class/ai_agents/index.rst.txt b/docs/_sources/autoapi/abacusai/api_class/ai_agents/index.rst.txt
@@ -501,6 +501,8 @@ Module Contents
    :type name: str
    :param is_required: Indicates whether the input is required. Defaults to False.
    :type is_required: bool
+   :param description: The description of this input.
+   :type description: str
 
 
    .. py:attribute:: name
@@ -511,6 +513,10 @@ Module Contents
       :type:  bool
 
 
+   .. py:attribute:: description
+      :type:  str
+
+
    .. py:method:: to_dict()
 
       Standardizes converting an ApiClass to dictionary.
@@ -535,6 +541,8 @@ Module Contents
    :type name: str
    :param variable_type: The type of the output.
    :type variable_type: WorkflowNodeOutputType
+   :param description: The description of this output.
+   :type description: str
 
 
    .. py:attribute:: name
@@ -545,6 +553,10 @@ Module Contents
       :type:  abacusai.api_class.enums.WorkflowNodeOutputType
 
 
+   .. py:attribute:: description
+      :type:  str
+
+
    .. py:method:: to_dict()
 
       Standardizes converting an ApiClass to dictionary.

diff --git a/docs/_sources/autoapi/abacusai/api_class/document_retriever/index.rst.txt b/docs/_sources/autoapi/abacusai/api_class/document_retriever/index.rst.txt
@@ -44,6 +44,10 @@ Module Contents
    :type prune_vectors: bool
    :param index_metadata_columns: If True, metadata columns of the FG will also be used for indexing and querying.
    :type index_metadata_columns: bool
+   :param use_document_summary: If True, uses the summary of the document in addition to chunks of the document for indexing and querying.
+   :type use_document_summary: bool
+   :param summary_instructions: Instructions for the LLM to generate the document summary.
+   :type summary_instructions: str
 
 
    .. py:attribute:: chunk_size
@@ -74,5 +78,13 @@ Module Contents
       :type:  bool
 
 
+   .. py:attribute:: use_document_summary
+      :type:  bool
+
+
+   .. py:attribute:: summary_instructions
+      :type:  str
+
+
 .. py:data:: DocumentRetrieverConfig
 
diff --git a/docs/_sources/autoapi/abacusai/api_class/enums/index.rst.txt b/docs/_sources/autoapi/abacusai/api_class/enums/index.rst.txt
@@ -1773,11 +1773,6 @@ Module Contents
 
 
 
-   .. py:attribute:: JIRAAUTH
-      :value: 'JIRAAUTH'
-
-
-
    .. py:attribute:: TEAMSSCRAPER
       :value: 'TEAMSSCRAPER'
Original file line number	Diff line number	Diff line change
Expand Up		@@ -218,4 +218,4 @@
		from .workflow_node_template import WorkflowNodeTemplate


		__version__ = "1.4.15"
		__version__ = "1.4.16"