Skip to content

Commit

Permalink
Version 1.4.23
Browse files Browse the repository at this point in the history
  • Loading branch information
surajabacusai committed Dec 13, 2024
1 parent 39bd61e commit 1de29c3
Show file tree
Hide file tree
Showing 273 changed files with 2,385 additions and 242 deletions.
4 changes: 3 additions & 1 deletion abacusai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from .chatllm_referral_invite import ChatllmReferralInvite
from .client import AgentResponse, ApiClient, ApiException, ClientOptions, ReadOnlyClient, _request_context
from .code_autocomplete_response import CodeAutocompleteResponse
from .code_bot import CodeBot
from .code_edit_response import CodeEditResponse
from .code_source import CodeSource
from .compute_point_info import ComputePointInfo
Expand Down Expand Up @@ -220,10 +221,11 @@
from .user_exception import UserException
from .video_gen_settings import VideoGenSettings
from .video_search_result import VideoSearchResult
from .web_page_response import WebPageResponse
from .web_search_response import WebSearchResponse
from .web_search_result import WebSearchResult
from .webhook import Webhook
from .workflow_node_template import WorkflowNodeTemplate


__version__ = "1.4.22"
__version__ = "1.4.23"
14 changes: 11 additions & 3 deletions abacusai/api_class/ai_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,14 @@ class WorkflowNodeInputMapping(ApiClass):
If the type is `USER_INPUT`, the value given by the source node will be used as the default initial value before the user edits it.
Set to `None` if the type is `USER_INPUT` and the variable doesn't need a pre-filled initial value.
is_required (bool): Indicates whether the input is required. Defaults to True.
description (str): The description of this input.
"""
name: str
variable_type: enums.WorkflowNodeInputType
variable_source: str = dataclasses.field(default=None)
source_prop: str = dataclasses.field(default=None)
is_required: bool = dataclasses.field(default=True)
description: str = dataclasses.field(default=None)

def __post_init__(self):
if self.variable_type == enums.WorkflowNodeInputType.IGNORE and self.is_required:
Expand All @@ -81,6 +83,7 @@ def to_dict(self):
'variable_source': self.variable_source,
'source_prop': self.source_prop or self.name,
'is_required': self.is_required,
'description': self.description
}

@classmethod
Expand All @@ -94,6 +97,7 @@ def from_dict(cls, mapping: dict):
variable_source=mapping.get('variable_source'),
source_prop=mapping.get('source_prop') or mapping['name'] if mapping.get('variable_source') else None,
is_required=mapping.get('is_required', True),
description=mapping.get('description')
)


Expand Down Expand Up @@ -172,9 +176,11 @@ class WorkflowNodeOutputMapping(ApiClass):
Args:
name (str): The name of the output.
variable_type (Union[WorkflowNodeOutputType, str]): The type of the output in the form of an enum or a string.
description (str): The description of this output.
"""
name: str
variable_type: Union[enums.WorkflowNodeOutputType, str] = dataclasses.field(default=enums.WorkflowNodeOutputType.ANY)
description: str = dataclasses.field(default=None)

def __post_init__(self):
if isinstance(self.variable_type, str):
Expand All @@ -183,7 +189,8 @@ def __post_init__(self):
def to_dict(self):
return {
'name': self.name,
'variable_type': self.variable_type.value
'variable_type': self.variable_type.value,
'description': self.description
}

@classmethod
Expand All @@ -194,7 +201,8 @@ def from_dict(cls, mapping: dict):
raise ValueError('output_mapping', f'Invalid enum argument {variable_type}. Provided argument should be of enum type WorkflowNodeOutputType.')
return cls(
name=mapping['name'],
variable_type=enums.WorkflowNodeOutputType(variable_type)
variable_type=enums.WorkflowNodeOutputType(variable_type),
description=mapping.get('description')
)


Expand Down Expand Up @@ -392,7 +400,7 @@ def from_template(cls, template_name: str, name: str, configs: dict = None, inpu
if isinstance(input_mappings, List) and all(isinstance(input, WorkflowNodeInputMapping) for input in input_mappings):
instance_input_mappings = input_mappings
elif isinstance(input_mappings, Dict) and all(isinstance(key, str) and isinstance(value, WorkflowNodeInputMapping) for key, value in input_mappings.items()):
instance_input_mappings = [WorkflowNodeInputMapping(name=arg, variable_type=mapping.variable_type, variable_source=mapping.variable_source, source_prop=mapping.source_prop, is_required=mapping.is_required) for arg, mapping in input_mappings]
instance_input_mappings = [WorkflowNodeInputMapping(name=arg, variable_type=mapping.variable_type, variable_source=mapping.variable_source, source_prop=mapping.source_prop, is_required=mapping.is_required, description=mapping.description) for arg, mapping in input_mappings]
elif input_mappings is None:
instance_input_mappings = []
else:
Expand Down
2 changes: 2 additions & 0 deletions abacusai/api_class/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,13 +473,15 @@ class LLMName(ApiEnum):
LLAMA3_1_405B = 'LLAMA3_1_405B'
LLAMA3_1_70B = 'LLAMA3_1_70B'
LLAMA3_1_8B = 'LLAMA3_1_8B'
LLAMA3_3_70B = 'LLAMA3_3_70B'
LLAMA3_LARGE_CHAT = 'LLAMA3_LARGE_CHAT'
CLAUDE_V3_OPUS = 'CLAUDE_V3_OPUS'
CLAUDE_V3_SONNET = 'CLAUDE_V3_SONNET'
CLAUDE_V3_HAIKU = 'CLAUDE_V3_HAIKU'
CLAUDE_V3_5_SONNET = 'CLAUDE_V3_5_SONNET'
CLAUDE_V3_5_HAIKU = 'CLAUDE_V3_5_HAIKU'
GEMINI_1_5_PRO = 'GEMINI_1_5_PRO'
GEMINI_2_FLASH = 'GEMINI_2_FLASH'
ABACUS_SMAUG3 = 'ABACUS_SMAUG3'
ABACUS_DRACARYS = 'ABACUS_DRACARYS'
QWEN_2_5_32B = 'QWEN_2_5_32B'
Expand Down
33 changes: 31 additions & 2 deletions abacusai/api_client_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from itertools import groupby
from typing import IO, Callable, List

import pandas as pd


INVALID_PANDAS_COLUMN_NAME_CHARACTERS = '[^A-Za-z0-9_]'

Expand Down Expand Up @@ -44,6 +46,34 @@ def avro_to_pandas_dtype(avro_type):
return avro_pandas_dtypes.get(avro_type, 'object')


def _get_spark_incompatible_columns(df):
# Spark-compatible pandas dtypes
spark_compatible_pd_dtypes = {
'int8', 'int16', 'int32', 'int64',
'float32', 'float64',
'bool', # Standard boolean type
'boolean', # Nullable BooleanDtype
'object', # Assuming they contain strings
'string', # StringDtype introduced in pandas 1.0
'datetime64[ns]',
'timedelta64[ns]'
}

incompatible_columns = []

for col in df.columns:
dtype = df[col].dtype
dtype_str = str(dtype)
if pd.api.types.is_extension_array_dtype(dtype):
dtype_name = dtype.name.lower()
if dtype_name not in spark_compatible_pd_dtypes:
incompatible_columns.append((col, dtype_name))
elif dtype_str not in spark_compatible_pd_dtypes:
incompatible_columns.append((col, dtype_str))

return incompatible_columns, spark_compatible_pd_dtypes


def get_non_nullable_type(types):
non_nullable_types = [
avro_type for avro_type in types if avro_type != 'null']
Expand Down Expand Up @@ -480,8 +510,7 @@ def get_pandas_pages_df(cls, df, feature_group_version: str, doc_id_column: str,
json_pages_list = [{**(page or {}), doc_id_column: content_hash_to_doc_id[content_hash]}
for content_hash, page in pages_list]
pages_df_with_config = pd.DataFrame(json_pages_list)
pages_df_with_config = pages_df_with_config.replace(
{pd.np.nan: None})
pages_df_with_config = pages_df_with_config.replace({np.nan: None})

df = df.drop_duplicates([doc_id_column])
group_by_archive = df.groupby(
Expand Down
41 changes: 33 additions & 8 deletions abacusai/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@
from .api_class.segments import ResponseSection, Segment
from .api_client_utils import (
INVALID_PANDAS_COLUMN_NAME_CHARACTERS, StreamingHandler, StreamType,
clean_column_name, get_object_from_context, run
_get_spark_incompatible_columns, clean_column_name,
get_object_from_context, run
)
from .api_endpoint import ApiEndpoint
from .api_key import ApiKey
Expand Down Expand Up @@ -177,6 +178,7 @@
from .use_case import UseCase
from .use_case_requirements import UseCaseRequirements
from .user import User
from .web_page_response import WebPageResponse
from .web_search_response import WebSearchResponse
from .webhook import Webhook
from .workflow_node_template import WorkflowNodeTemplate
Expand Down Expand Up @@ -633,7 +635,7 @@ class BaseApiClient:
client_options (ClientOptions): Optional API client configurations
skip_version_check (bool): If true, will skip checking the server's current API version on initializing the client
"""
client_version = '1.4.22'
client_version = '1.4.23'

def __init__(self, api_key: str = None, server: str = None, client_options: ClientOptions = None, skip_version_check: bool = False, include_tb: bool = False):
self.api_key = api_key
Expand Down Expand Up @@ -941,6 +943,17 @@ def _validate_pandas_df(self, df, clean_column_names: bool):
if bad_column_names:
raise ValueError(
f'The dataframe\'s Column(s): {bad_column_names} contain illegal characters. Please rename the columns such that they only contain alphanumeric characters and underscores, and must start with an alpha character.')

incompatible_columns, compatible_pd_dtypes = _get_spark_incompatible_columns(
df)
if incompatible_columns:
error_message = "The following columns have incompatible data types:\n"
for col_name, col_dtype in incompatible_columns:
error_message += f" - '{col_name}' (type: {col_dtype})\n"
error_message += f"Supported data types are: {', '.join(sorted(compatible_pd_dtypes))}\n"
error_message += "Please cast these columns to a supported data type and try again.\n"
raise ValueError(error_message)

return df

def _upload_from_pandas(self, upload, df, clean_column_names=False) -> Dataset:
Expand Down Expand Up @@ -4586,15 +4599,16 @@ def remove_user_from_organization(self, email: str):
email (str): The email address of the user to remove from the organization."""
return self._call_api('removeUserFromOrganization', 'DELETE', query_params={'email': email})

def send_email(self, email: str, subject: str, body: str, is_html: bool = False):
def send_email(self, email: str, subject: str, body: str, is_html: bool = False, attachments: None = None):
"""Send an email to the specified email address with provided subject and contents.

Args:
email (str): The email address to send the email to.
subject (str): The subject of the email.
body (str): The body of the email.
is_html (bool): Whether the body is html or not."""
return self._call_api('sendEmail', 'POST', query_params={}, body={'email': email, 'subject': subject, 'body': body, 'isHtml': is_html})
is_html (bool): Whether the body is html or not.
attachments (None): A dictionary where the key is the filename (including the file extension), and the value is either a file-like object (e.g., an open file in binary mode) or raw file data (e.g., bytes)."""
return self._call_api('sendEmail', 'POST', query_params={}, data={'email': json.dumps(email) if (email is not None and not isinstance(email, str)) else email, 'subject': json.dumps(subject) if (subject is not None and not isinstance(subject, str)) else subject, 'body': json.dumps(body) if (body is not None and not isinstance(body, str)) else body, 'isHtml': json.dumps(is_html) if (is_html is not None and not isinstance(is_html, str)) else is_html}, files=attachments)

def create_deployment_webhook(self, deployment_id: str, endpoint: str, webhook_event_type: str, payload_template: dict = None) -> Webhook:
"""Create a webhook attached to a given deployment ID.
Expand Down Expand Up @@ -6002,7 +6016,7 @@ def extract_document_data(self, document: io.TextIOBase = None, doc_id: str = No

Returns:
DocumentData: The extracted document data."""
return self._proxy_request('ExtractDocumentData', 'POST', query_params={}, data={'docId': doc_id, 'documentProcessingConfig': json.dumps(document_processing_config), 'startPage': start_page, 'endPage': end_page, 'returnExtractedPageText': return_extracted_page_text}, files={'document': document}, parse_type=DocumentData)
return self._proxy_request('ExtractDocumentData', 'POST', query_params={}, data={'docId': doc_id, 'documentProcessingConfig': json.dumps(document_processing_config.to_dict()) if hasattr(document_processing_config, 'to_dict') else json.dumps(document_processing_config), 'startPage': start_page, 'endPage': end_page, 'returnExtractedPageText': return_extracted_page_text}, files={'document': document}, parse_type=DocumentData)

def get_training_config_options(self, project_id: str, feature_group_ids: List = None, for_retrain: bool = False, current_training_config: Union[dict, TrainingConfig] = None) -> List[TrainingConfigOptions]:
"""Retrieves the full initial description of the model training configuration options available for the specified project. The configuration options available are determined by the use case associated with the specified project. Refer to the [Use Case Documentation]({USE_CASES_URL}) for more information on use cases and use case-specific configuration options.
Expand Down Expand Up @@ -7806,7 +7820,7 @@ def upsert_data(self, feature_group_id: str, data: dict, streaming_token: str =

Returns:
FeatureGroupRow: The feature group row that was upserted."""
return self._proxy_request('upsertData', 'POST', query_params={}, data={'featureGroupId': feature_group_id, 'data': json.dumps(data), 'streamingToken': streaming_token}, files=blobs, parse_type=FeatureGroupRow, is_sync=True)
return self._proxy_request('upsertData', 'POST', query_params={}, data={'featureGroupId': feature_group_id, 'data': json.dumps(data.to_dict()) if hasattr(data, 'to_dict') else json.dumps(data), 'streamingToken': streaming_token}, files=blobs, parse_type=FeatureGroupRow, is_sync=True)

def delete_data(self, feature_group_id: str, primary_key: str):
"""Deletes a row from the feature group given the primary key
Expand Down Expand Up @@ -8708,6 +8722,17 @@ def search_web_for_llm(self, queries: List, search_providers: List = None, max_r
WebSearchResponse: Results of running the search queries."""
return self._proxy_request('SearchWebForLlm', 'POST', query_params={}, body={'queries': queries, 'searchProviders': search_providers, 'maxResults': max_results, 'safe': safe, 'fetchContent': fetch_content, 'maxPageTokens': max_page_tokens, 'convertToMarkdown': convert_to_markdown}, parse_type=WebSearchResponse)

def fetch_web_page(self, url: str, convert_to_markdown: bool = True) -> WebPageResponse:
"""Scrapes the content of a web page and returns it as a string.

Args:
url (str): The url of the web page to scrape.
convert_to_markdown (bool): Whether content should be converted to markdown.

Returns:
WebPageResponse: The content of the web page."""
return self._proxy_request('FetchWebPage', 'POST', query_params={}, body={'url': url, 'convertToMarkdown': convert_to_markdown}, parse_type=WebPageResponse)

def construct_agent_conversation_messages_for_llm(self, deployment_conversation_id: str = None, external_session_id: str = None, include_document_contents: bool = True) -> AgentConversation:
"""Returns conversation history in a format for LLM calls.

Expand Down Expand Up @@ -8840,4 +8865,4 @@ def get_relevant_snippets(self, doc_ids: List = None, blobs: io.TextIOBase = Non

Returns:
list[DocumentRetrieverLookupResult]: The snippets found from the documents."""
return self._proxy_request('GetRelevantSnippets', 'POST', query_params={}, data={'docIds': doc_ids, 'query': query, 'documentRetrieverConfig': json.dumps(document_retriever_config), 'honorSentenceBoundary': honor_sentence_boundary, 'numRetrievalMarginWords': num_retrieval_margin_words, 'maxWordsPerSnippet': max_words_per_snippet, 'maxSnippetsPerDocument': max_snippets_per_document, 'startWordIndex': start_word_index, 'endWordIndex': end_word_index, 'includingBoundingBoxes': including_bounding_boxes, 'text': text}, files=blobs, parse_type=DocumentRetrieverLookupResult)
return self._proxy_request('GetRelevantSnippets', 'POST', query_params={}, data={'docIds': doc_ids, 'query': query, 'documentRetrieverConfig': json.dumps(document_retriever_config.to_dict()) if hasattr(document_retriever_config, 'to_dict') else json.dumps(document_retriever_config), 'honorSentenceBoundary': honor_sentence_boundary, 'numRetrievalMarginWords': num_retrieval_margin_words, 'maxWordsPerSnippet': max_words_per_snippet, 'maxSnippetsPerDocument': max_snippets_per_document, 'startWordIndex': start_word_index, 'endWordIndex': end_word_index, 'includingBoundingBoxes': including_bounding_boxes, 'text': text}, files=blobs, parse_type=DocumentRetrieverLookupResult)
36 changes: 36 additions & 0 deletions abacusai/code_bot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from .return_class import AbstractApiClass


class CodeBot(AbstractApiClass):
"""
A bot option for CodeLLM
Args:
client (ApiClient): An authenticated API Client instance
llmName (str): The name of the LLM.
name (str): The name of the bot.
"""

def __init__(self, client, llmName=None, name=None):
super().__init__(client, None)
self.llm_name = llmName
self.name = name
self.deprecated_keys = {}

def __repr__(self):
repr_dict = {f'llm_name': repr(
self.llm_name), f'name': repr(self.name)}
class_name = "CodeBot"
repr_str = ',\n '.join([f'{key}={value}' for key, value in repr_dict.items(
) if getattr(self, key, None) is not None and key not in self.deprecated_keys])
return f"{class_name}({repr_str})"

def to_dict(self):
"""
Get a dict representation of the parameters in this class
Returns:
dict: The dict value representation of the class parameters
"""
resp = {'llm_name': self.llm_name, 'name': self.name}
return {key: value for key, value in resp.items() if value is not None and key not in self.deprecated_keys}
Loading

0 comments on commit 1de29c3

Please sign in to comment.