Version 1.4.23

abacusai · Dec 13, 2024 · 1de29c3 · 1de29c3
1 parent 39bd61e
commit 1de29c3
Show file tree

Hide file tree

Showing 273 changed files with 2,385 additions and 242 deletions.
diff --git a/abacusai/__init__.py b/abacusai/__init__.py
@@ -30,6 +30,7 @@
 from .chatllm_referral_invite import ChatllmReferralInvite
 from .client import AgentResponse, ApiClient, ApiException, ClientOptions, ReadOnlyClient, _request_context
 from .code_autocomplete_response import CodeAutocompleteResponse
+from .code_bot import CodeBot
 from .code_edit_response import CodeEditResponse
 from .code_source import CodeSource
 from .compute_point_info import ComputePointInfo
@@ -220,10 +221,11 @@
 from .user_exception import UserException
 from .video_gen_settings import VideoGenSettings
 from .video_search_result import VideoSearchResult
+from .web_page_response import WebPageResponse
 from .web_search_response import WebSearchResponse
 from .web_search_result import WebSearchResult
 from .webhook import Webhook
 from .workflow_node_template import WorkflowNodeTemplate
 
 
-__version__ = "1.4.22"
+__version__ = "1.4.23"
diff --git a/abacusai/api_class/ai_agents.py b/abacusai/api_class/ai_agents.py
@@ -61,12 +61,14 @@ class WorkflowNodeInputMapping(ApiClass):
                                If the type is `USER_INPUT`, the value given by the source node will be used as the default initial value before the user edits it.
                                Set to `None` if the type is `USER_INPUT` and the variable doesn't need a pre-filled initial value.
         is_required (bool): Indicates whether the input is required. Defaults to True.
+        description (str): The description of this input.
     """
     name: str
     variable_type: enums.WorkflowNodeInputType
     variable_source: str = dataclasses.field(default=None)
     source_prop: str = dataclasses.field(default=None)
     is_required: bool = dataclasses.field(default=True)
+    description: str = dataclasses.field(default=None)
 
     def __post_init__(self):
         if self.variable_type == enums.WorkflowNodeInputType.IGNORE and self.is_required:
@@ -81,6 +83,7 @@ def to_dict(self):
             'variable_source': self.variable_source,
             'source_prop': self.source_prop or self.name,
             'is_required': self.is_required,
+            'description': self.description
         }
 
     @classmethod
@@ -94,6 +97,7 @@ def from_dict(cls, mapping: dict):
             variable_source=mapping.get('variable_source'),
             source_prop=mapping.get('source_prop') or mapping['name'] if mapping.get('variable_source') else None,
             is_required=mapping.get('is_required', True),
+            description=mapping.get('description')
         )
 
 
@@ -172,9 +176,11 @@ class WorkflowNodeOutputMapping(ApiClass):
     Args:
         name (str): The name of the output.
         variable_type (Union[WorkflowNodeOutputType, str]): The type of the output in the form of an enum or a string.
+        description (str): The description of this output.
     """
     name: str
     variable_type: Union[enums.WorkflowNodeOutputType, str] = dataclasses.field(default=enums.WorkflowNodeOutputType.ANY)
+    description: str = dataclasses.field(default=None)
 
     def __post_init__(self):
         if isinstance(self.variable_type, str):
@@ -183,7 +189,8 @@ def __post_init__(self):
     def to_dict(self):
         return {
             'name': self.name,
-            'variable_type': self.variable_type.value
+            'variable_type': self.variable_type.value,
+            'description': self.description
         }
 
     @classmethod
@@ -194,7 +201,8 @@ def from_dict(cls, mapping: dict):
             raise ValueError('output_mapping', f'Invalid enum argument {variable_type}. Provided argument should be of enum type WorkflowNodeOutputType.')
         return cls(
             name=mapping['name'],
-            variable_type=enums.WorkflowNodeOutputType(variable_type)
+            variable_type=enums.WorkflowNodeOutputType(variable_type),
+            description=mapping.get('description')
         )
 
 
@@ -392,7 +400,7 @@ def from_template(cls, template_name: str, name: str, configs: dict = None, inpu
         if isinstance(input_mappings, List) and all(isinstance(input, WorkflowNodeInputMapping) for input in input_mappings):
             instance_input_mappings = input_mappings
         elif isinstance(input_mappings, Dict) and all(isinstance(key, str) and isinstance(value, WorkflowNodeInputMapping) for key, value in input_mappings.items()):
-            instance_input_mappings = [WorkflowNodeInputMapping(name=arg, variable_type=mapping.variable_type, variable_source=mapping.variable_source, source_prop=mapping.source_prop, is_required=mapping.is_required) for arg, mapping in input_mappings]
+            instance_input_mappings = [WorkflowNodeInputMapping(name=arg, variable_type=mapping.variable_type, variable_source=mapping.variable_source, source_prop=mapping.source_prop, is_required=mapping.is_required, description=mapping.description) for arg, mapping in input_mappings]
         elif input_mappings is None:
             instance_input_mappings = []
         else:

diff --git a/abacusai/api_class/enums.py b/abacusai/api_class/enums.py
@@ -473,13 +473,15 @@ class LLMName(ApiEnum):
     LLAMA3_1_405B = 'LLAMA3_1_405B'
     LLAMA3_1_70B = 'LLAMA3_1_70B'
     LLAMA3_1_8B = 'LLAMA3_1_8B'
+    LLAMA3_3_70B = 'LLAMA3_3_70B'
     LLAMA3_LARGE_CHAT = 'LLAMA3_LARGE_CHAT'
     CLAUDE_V3_OPUS = 'CLAUDE_V3_OPUS'
     CLAUDE_V3_SONNET = 'CLAUDE_V3_SONNET'
     CLAUDE_V3_HAIKU = 'CLAUDE_V3_HAIKU'
     CLAUDE_V3_5_SONNET = 'CLAUDE_V3_5_SONNET'
     CLAUDE_V3_5_HAIKU = 'CLAUDE_V3_5_HAIKU'
     GEMINI_1_5_PRO = 'GEMINI_1_5_PRO'
+    GEMINI_2_FLASH = 'GEMINI_2_FLASH'
     ABACUS_SMAUG3 = 'ABACUS_SMAUG3'
     ABACUS_DRACARYS = 'ABACUS_DRACARYS'
     QWEN_2_5_32B = 'QWEN_2_5_32B'

diff --git a/abacusai/api_client_utils.py b/abacusai/api_client_utils.py
@@ -7,6 +7,8 @@
 from itertools import groupby
 from typing import IO, Callable, List
 
+import pandas as pd
+
 
 INVALID_PANDAS_COLUMN_NAME_CHARACTERS = '[^A-Za-z0-9_]'
 
@@ -44,6 +46,34 @@ def avro_to_pandas_dtype(avro_type):
     return avro_pandas_dtypes.get(avro_type, 'object')
 
 
+def _get_spark_incompatible_columns(df):
+    # Spark-compatible pandas dtypes
+    spark_compatible_pd_dtypes = {
+        'int8', 'int16', 'int32', 'int64',
+        'float32', 'float64',
+        'bool',           # Standard boolean type
+        'boolean',        # Nullable BooleanDtype
+        'object',         # Assuming they contain strings
+        'string',         # StringDtype introduced in pandas 1.0
+        'datetime64[ns]',
+        'timedelta64[ns]'
+    }
+
+    incompatible_columns = []
+
+    for col in df.columns:
+        dtype = df[col].dtype
+        dtype_str = str(dtype)
+        if pd.api.types.is_extension_array_dtype(dtype):
+            dtype_name = dtype.name.lower()
+            if dtype_name not in spark_compatible_pd_dtypes:
+                incompatible_columns.append((col, dtype_name))
+        elif dtype_str not in spark_compatible_pd_dtypes:
+            incompatible_columns.append((col, dtype_str))
+
+    return incompatible_columns, spark_compatible_pd_dtypes
+
+
 def get_non_nullable_type(types):
     non_nullable_types = [
         avro_type for avro_type in types if avro_type != 'null']
@@ -480,8 +510,7 @@ def get_pandas_pages_df(cls, df, feature_group_version: str, doc_id_column: str,
             json_pages_list = [{**(page or {}), doc_id_column: content_hash_to_doc_id[content_hash]}
                                for content_hash, page in pages_list]
             pages_df_with_config = pd.DataFrame(json_pages_list)
-            pages_df_with_config = pages_df_with_config.replace(
-                {pd.np.nan: None})
+            pages_df_with_config = pages_df_with_config.replace({np.nan: None})
 
         df = df.drop_duplicates([doc_id_column])
         group_by_archive = df.groupby(

diff --git a/abacusai/client.py b/abacusai/client.py
@@ -53,7 +53,8 @@
 from .api_class.segments import ResponseSection, Segment
 from .api_client_utils import (
     INVALID_PANDAS_COLUMN_NAME_CHARACTERS, StreamingHandler, StreamType,
-    clean_column_name, get_object_from_context, run
+    _get_spark_incompatible_columns, clean_column_name,
+    get_object_from_context, run
 )
 from .api_endpoint import ApiEndpoint
 from .api_key import ApiKey
@@ -177,6 +178,7 @@
 from .use_case import UseCase
 from .use_case_requirements import UseCaseRequirements
 from .user import User
+from .web_page_response import WebPageResponse
 from .web_search_response import WebSearchResponse
 from .webhook import Webhook
 from .workflow_node_template import WorkflowNodeTemplate
@@ -633,7 +635,7 @@ class BaseApiClient:
         client_options (ClientOptions): Optional API client configurations
         skip_version_check (bool): If true, will skip checking the server's current API version on initializing the client
     """
-    client_version = '1.4.22'
+    client_version = '1.4.23'
 
     def __init__(self, api_key: str = None, server: str = None, client_options: ClientOptions = None, skip_version_check: bool = False, include_tb: bool = False):
         self.api_key = api_key
@@ -941,6 +943,17 @@ def _validate_pandas_df(self, df, clean_column_names: bool):
         if bad_column_names:
             raise ValueError(
                 f'The dataframe\'s Column(s): {bad_column_names} contain illegal characters. Please rename the columns such that they only contain alphanumeric characters and underscores, and must start with an alpha character.')
+
+        incompatible_columns, compatible_pd_dtypes = _get_spark_incompatible_columns(
+            df)
+        if incompatible_columns:
+            error_message = "The following columns have incompatible data types:\n"
+            for col_name, col_dtype in incompatible_columns:
+                error_message += f" - '{col_name}' (type: {col_dtype})\n"
+            error_message += f"Supported data types are: {', '.join(sorted(compatible_pd_dtypes))}\n"
+            error_message += "Please cast these columns to a supported data type and try again.\n"
+            raise ValueError(error_message)
+
         return df
 
     def _upload_from_pandas(self, upload, df, clean_column_names=False) -> Dataset:
@@ -4586,15 +4599,16 @@ def remove_user_from_organization(self, email: str):
             email (str): The email address of the user to remove from the organization."""
         return self._call_api('removeUserFromOrganization', 'DELETE', query_params={'email': email})
 
-    def send_email(self, email: str, subject: str, body: str, is_html: bool = False):
+    def send_email(self, email: str, subject: str, body: str, is_html: bool = False, attachments: None = None):
         """Send an email to the specified email address with provided subject and contents.
 
         Args:
             email (str): The email address to send the email to.
             subject (str): The subject of the email.
             body (str): The body of the email.
-            is_html (bool): Whether the body is html or not."""
-        return self._call_api('sendEmail', 'POST', query_params={}, body={'email': email, 'subject': subject, 'body': body, 'isHtml': is_html})
+            is_html (bool): Whether the body is html or not.
+            attachments (None): A dictionary where the key is the filename (including the file extension), and the value is either a file-like object (e.g., an open file in binary mode) or raw file data (e.g., bytes)."""
+        return self._call_api('sendEmail', 'POST', query_params={}, data={'email': json.dumps(email) if (email is not None and not isinstance(email, str)) else email, 'subject': json.dumps(subject) if (subject is not None and not isinstance(subject, str)) else subject, 'body': json.dumps(body) if (body is not None and not isinstance(body, str)) else body, 'isHtml': json.dumps(is_html) if (is_html is not None and not isinstance(is_html, str)) else is_html}, files=attachments)
 
     def create_deployment_webhook(self, deployment_id: str, endpoint: str, webhook_event_type: str, payload_template: dict = None) -> Webhook:
         """Create a webhook attached to a given deployment ID.
@@ -6002,7 +6016,7 @@ def extract_document_data(self, document: io.TextIOBase = None, doc_id: str = No
 
         Returns:
             DocumentData: The extracted document data."""
-        return self._proxy_request('ExtractDocumentData', 'POST', query_params={}, data={'docId': doc_id, 'documentProcessingConfig': json.dumps(document_processing_config), 'startPage': start_page, 'endPage': end_page, 'returnExtractedPageText': return_extracted_page_text}, files={'document': document}, parse_type=DocumentData)
+        return self._proxy_request('ExtractDocumentData', 'POST', query_params={}, data={'docId': doc_id, 'documentProcessingConfig': json.dumps(document_processing_config.to_dict()) if hasattr(document_processing_config, 'to_dict') else json.dumps(document_processing_config), 'startPage': start_page, 'endPage': end_page, 'returnExtractedPageText': return_extracted_page_text}, files={'document': document}, parse_type=DocumentData)
 
     def get_training_config_options(self, project_id: str, feature_group_ids: List = None, for_retrain: bool = False, current_training_config: Union[dict, TrainingConfig] = None) -> List[TrainingConfigOptions]:
         """Retrieves the full initial description of the model training configuration options available for the specified project. The configuration options available are determined by the use case associated with the specified project. Refer to the [Use Case Documentation]({USE_CASES_URL}) for more information on use cases and use case-specific configuration options.
@@ -7806,7 +7820,7 @@ def upsert_data(self, feature_group_id: str, data: dict, streaming_token: str =
 
         Returns:
             FeatureGroupRow: The feature group row that was upserted."""
-        return self._proxy_request('upsertData', 'POST', query_params={}, data={'featureGroupId': feature_group_id, 'data': json.dumps(data), 'streamingToken': streaming_token}, files=blobs, parse_type=FeatureGroupRow, is_sync=True)
+        return self._proxy_request('upsertData', 'POST', query_params={}, data={'featureGroupId': feature_group_id, 'data': json.dumps(data.to_dict()) if hasattr(data, 'to_dict') else json.dumps(data), 'streamingToken': streaming_token}, files=blobs, parse_type=FeatureGroupRow, is_sync=True)
 
     def delete_data(self, feature_group_id: str, primary_key: str):
         """Deletes a row from the feature group given the primary key
@@ -8708,6 +8722,17 @@ def search_web_for_llm(self, queries: List, search_providers: List = None, max_r
             WebSearchResponse: Results of running the search queries."""
         return self._proxy_request('SearchWebForLlm', 'POST', query_params={}, body={'queries': queries, 'searchProviders': search_providers, 'maxResults': max_results, 'safe': safe, 'fetchContent': fetch_content, 'maxPageTokens': max_page_tokens, 'convertToMarkdown': convert_to_markdown}, parse_type=WebSearchResponse)
 
+    def fetch_web_page(self, url: str, convert_to_markdown: bool = True) -> WebPageResponse:
+        """Scrapes the content of a web page and returns it as a string.
+
+        Args:
+            url (str): The url of the web page to scrape.
+            convert_to_markdown (bool): Whether content should be converted to markdown.
+
+        Returns:
+            WebPageResponse: The content of the web page."""
+        return self._proxy_request('FetchWebPage', 'POST', query_params={}, body={'url': url, 'convertToMarkdown': convert_to_markdown}, parse_type=WebPageResponse)
+
     def construct_agent_conversation_messages_for_llm(self, deployment_conversation_id: str = None, external_session_id: str = None, include_document_contents: bool = True) -> AgentConversation:
         """Returns conversation history in a format for LLM calls.
 
@@ -8840,4 +8865,4 @@ def get_relevant_snippets(self, doc_ids: List = None, blobs: io.TextIOBase = Non
 
         Returns:
             list[DocumentRetrieverLookupResult]: The snippets found from the documents."""
-        return self._proxy_request('GetRelevantSnippets', 'POST', query_params={}, data={'docIds': doc_ids, 'query': query, 'documentRetrieverConfig': json.dumps(document_retriever_config), 'honorSentenceBoundary': honor_sentence_boundary, 'numRetrievalMarginWords': num_retrieval_margin_words, 'maxWordsPerSnippet': max_words_per_snippet, 'maxSnippetsPerDocument': max_snippets_per_document, 'startWordIndex': start_word_index, 'endWordIndex': end_word_index, 'includingBoundingBoxes': including_bounding_boxes, 'text': text}, files=blobs, parse_type=DocumentRetrieverLookupResult)
+        return self._proxy_request('GetRelevantSnippets', 'POST', query_params={}, data={'docIds': doc_ids, 'query': query, 'documentRetrieverConfig': json.dumps(document_retriever_config.to_dict()) if hasattr(document_retriever_config, 'to_dict') else json.dumps(document_retriever_config), 'honorSentenceBoundary': honor_sentence_boundary, 'numRetrievalMarginWords': num_retrieval_margin_words, 'maxWordsPerSnippet': max_words_per_snippet, 'maxSnippetsPerDocument': max_snippets_per_document, 'startWordIndex': start_word_index, 'endWordIndex': end_word_index, 'includingBoundingBoxes': including_bounding_boxes, 'text': text}, files=blobs, parse_type=DocumentRetrieverLookupResult)
diff --git a/abacusai/code_bot.py b/abacusai/code_bot.py
@@ -0,0 +1,36 @@
+from .return_class import AbstractApiClass
+
+
+class CodeBot(AbstractApiClass):
+    """
+        A bot option for CodeLLM
+
+        Args:
+            client (ApiClient): An authenticated API Client instance
+            llmName (str): The name of the LLM.
+            name (str): The name of the bot.
+    """
+
+    def __init__(self, client, llmName=None, name=None):
+        super().__init__(client, None)
+        self.llm_name = llmName
+        self.name = name
+        self.deprecated_keys = {}
+
+    def __repr__(self):
+        repr_dict = {f'llm_name': repr(
+            self.llm_name), f'name': repr(self.name)}
+        class_name = "CodeBot"
+        repr_str = ',\n  '.join([f'{key}={value}' for key, value in repr_dict.items(
+        ) if getattr(self, key, None) is not None and key not in self.deprecated_keys])
+        return f"{class_name}({repr_str})"
+
+    def to_dict(self):
+        """
+        Get a dict representation of the parameters in this class
+
+        Returns:
+            dict: The dict value representation of the class parameters
+        """
+        resp = {'llm_name': self.llm_name, 'name': self.name}
+        return {key: value for key, value in resp.items() if value is not None and key not in self.deprecated_keys}