Skip to content

Commit

Permalink
Version 1.3.2
Browse files Browse the repository at this point in the history
  • Loading branch information
harshit-jain-git committed May 24, 2024
1 parent 59b4176 commit 19c5599
Show file tree
Hide file tree
Showing 481 changed files with 134,152 additions and 60,688 deletions.
2 changes: 1 addition & 1 deletion abacusai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
from .streaming_client import StreamingClient


__version__ = "1.2.5"
__version__ = "1.3.2"
2 changes: 1 addition & 1 deletion abacusai/api_class/ai_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class WorkflowNodeInputSchema(ApiClass):
A react-jsonschema-form conformant schema for workflow node input.
Args:
json_schema (dict): The json schema for the input conformant to react-jsonschema-form specification. Must define keys like "title", "type" and "properties".
json_schema (dict): The json schema for the input conformant to react-jsonschema-form specification. Must define keys like "title", "type" and "properties". Supported elements - Checkbox, Radio Button, Dropdown, Textarea, Number, Date, File Upload. Not supported - Nested elements, arrays and other complex types.
ui_schema (dict): The ui schema for the input conformant to react-jsonschema-form specification.
"""
json_schema: dict
Expand Down
43 changes: 43 additions & 0 deletions abacusai/api_class/connectors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import dataclasses

from . import enums
from .abstract import _ApiClassFactory
from .dataset import DatasetConfig


@dataclasses.dataclass
class StreamingConnectorDatasetConfig(DatasetConfig):
"""
An abstract class for dataset configs specific to streaming connectors.
Args:
streaming_connector_type (StreamingConnectorType): The type of streaming connector
"""
streaming_connector_type: enums.StreamingConnectorType = dataclasses.field(default=None, repr=False, init=False)

@classmethod
def _get_builder(cls):
return _StreamingConnectorDatasetConfigFactory


@dataclasses.dataclass
class KafkaDatasetConfig(StreamingConnectorDatasetConfig):
"""
Dataset config for Kafka Streaming Connector
Args:
topic (str): The kafka topic to consume
"""
topic: str

def __post_init__(self):
self.streaming_connector_type = enums.StreamingConnectorType.KAFKA


@dataclasses.dataclass
class _StreamingConnectorDatasetConfigFactory(_ApiClassFactory):
config_abstract_class = StreamingConnectorDatasetConfig
config_class_key = 'streaming_connector_type'
config_class_map = {
enums.StreamingConnectorType.KAFKA: KafkaDatasetConfig,
}
11 changes: 11 additions & 0 deletions abacusai/api_class/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@
from .enums import OcrMode


@dataclasses.dataclass
class DatasetConfig(ApiClass):
"""
An abstract class for dataset configs
Args:
is_documentset (bool): Whether the dataset is a document set
"""
is_documentset: bool = dataclasses.field(default=None)


@dataclasses.dataclass
class ParsingConfig(ApiClass):
"""
Expand Down
32 changes: 15 additions & 17 deletions abacusai/api_class/dataset_application_connector.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,27 @@
import dataclasses

from . import enums
from .abstract import ApiClass, _ApiClassFactory
from .abstract import _ApiClassFactory
from .dataset import DatasetConfig


@dataclasses.dataclass
class DatasetConfig(ApiClass):
class ApplicationConnectorDatasetConfig(DatasetConfig):
"""
An abstract class for dataset configs specific to application connectors.
Args:
application_connector_type(enums.ApplicationConnectorType): The type of application connector
is_documentset (bool): Whether the dataset is a document set
"""
application_connector_type: enums.ApplicationConnectorType = dataclasses.field(default=None, repr=False, init=False)
is_documentset: bool = dataclasses.field(default=None)

@classmethod
def _get_builder(cls):
return _DatasetConfigFactory
return _ApplicationConnectorDatasetConfigFactory


@dataclasses.dataclass
class ConfluenceDatasetConfig(DatasetConfig):
class ConfluenceDatasetConfig(ApplicationConnectorDatasetConfig):
"""
Dataset config for Confluence Application Connector
Args:
Expand All @@ -42,7 +41,7 @@ def __post_init__(self):


@dataclasses.dataclass
class GoogleAnalyticsDatasetConfig(DatasetConfig):
class GoogleAnalyticsDatasetConfig(ApplicationConnectorDatasetConfig):
"""
Dataset config for Google Analytics Application Connector
Expand All @@ -60,7 +59,7 @@ def __post_init__(self):


@dataclasses.dataclass
class GoogleDriveDatasetConfig(DatasetConfig):
class GoogleDriveDatasetConfig(ApplicationConnectorDatasetConfig):
"""
Dataset config for Google Drive Application Connector
Expand All @@ -80,7 +79,7 @@ def __post_init__(self):


@dataclasses.dataclass
class JiraDatasetConfig(DatasetConfig):
class JiraDatasetConfig(ApplicationConnectorDatasetConfig):
"""
Dataset config for Jira Application Connector
Expand All @@ -100,7 +99,7 @@ def __post_init__(self):


@dataclasses.dataclass
class OneDriveDatasetConfig(DatasetConfig):
class OneDriveDatasetConfig(ApplicationConnectorDatasetConfig):
"""
Dataset config for OneDrive Application Connector
Expand All @@ -120,13 +119,12 @@ def __post_init__(self):


@dataclasses.dataclass
class SharepointDatasetConfig(DatasetConfig):
class SharepointDatasetConfig(ApplicationConnectorDatasetConfig):
"""
Dataset config for Sharepoint Application Connector
Args:
location (str): The regex location of the files to fetch
is_documentset (bool): Whether the dataset is a document set
csv_delimiter (str): If the file format is CSV, use a specific csv delimiter
extract_bounding_boxes (bool): Signifies whether to extract bounding boxes out of the documents. Only valid if is_documentset if True
merge_file_schemas (bool): Signifies if the merge file schema policy is enabled. Not applicable if is_documentset is True
Expand All @@ -141,7 +139,7 @@ def __post_init__(self):


@dataclasses.dataclass
class ZendeskDatasetConfig(DatasetConfig):
class ZendeskDatasetConfig(ApplicationConnectorDatasetConfig):
"""
Dataset config for Zendesk Application Connector
Expand All @@ -155,7 +153,7 @@ def __post_init__(self):


@dataclasses.dataclass
class AbacusUsageMetricsDatasetConfig(DatasetConfig):
class AbacusUsageMetricsDatasetConfig(ApplicationConnectorDatasetConfig):
"""
Dataset config for Abacus Usage Metrics Application Connector
Expand All @@ -172,7 +170,7 @@ def __post_init__(self):


@dataclasses.dataclass
class FreshserviceDatasetConfig(DatasetConfig):
class FreshserviceDatasetConfig(ApplicationConnectorDatasetConfig):
"""
Dataset config for Freshservice Application Connector
"""
Expand All @@ -182,8 +180,8 @@ def __post_init__(self):


@dataclasses.dataclass
class _DatasetConfigFactory(_ApiClassFactory):
config_abstract_class = DatasetConfig
class _ApplicationConnectorDatasetConfigFactory(_ApiClassFactory):
config_abstract_class = ApplicationConnectorDatasetConfig
config_class_key = 'application_connector_type'
config_class_map = {
enums.ApplicationConnectorType.CONFLUENCE: ConfluenceDatasetConfig,
Expand Down
8 changes: 2 additions & 6 deletions abacusai/api_class/document_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
@dataclasses.dataclass
class VectorStoreConfig(ApiClass):
"""
Configs for vector store indexing.
Config for indexing options of a document retriever. Default values of optional arguments are heuristically selected by the Abacus.AI platform based on the underlying data.
Args:
chunk_size (int): The size of text chunks in the vector store.
Expand All @@ -25,8 +25,4 @@ class VectorStoreConfig(ApiClass):
prune_vectors: bool = dataclasses.field(default=None)


@dataclasses.dataclass
class DocumentRetrieverConfig(VectorStoreConfig):
"""
Configs for document retriever. If any configuration value is not explicitly provided, Abacus.AI will automatically infer default values based on the data.
"""
DocumentRetrieverConfig = VectorStoreConfig
15 changes: 6 additions & 9 deletions abacusai/api_class/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,11 @@ class ApplicationConnectorType(ApiEnum):
ABACUSUSAGEMETRICS = 'ABACUSUSAGEMETRICS'
MICROSOFTAUTH = 'MICROSOFTAUTH'
FRESHSERVICE = 'FRESHSERVICE'
ZENDESKSUNSHINEMESSAGING = 'zendesksunshinemessaging'
ZENDESKSUNSHINEMESSAGING = 'ZENDESKSUNSHINEMESSAGING'


class StreamingConnectorType(ApiEnum):
KAFKA = 'KAFKA'


class PythonFunctionArgumentType(ApiEnum):
Expand Down Expand Up @@ -428,7 +432,7 @@ class LLMName(ApiEnum):
GEMINI_1_5_PRO = 'GEMINI_1_5_PRO'
MIXTRAL_CHAT = 'MIXTRAL_CHAT'
MISTRAL_MEDIUM = 'MISTRAL_MEDIUM'
ABACUS_SMAUG2 = 'ABACUS_SMAUG2'
ABACUS_SMAUG3 = 'ABACUS_SMAUG3'


class MonitorAlertType(ApiEnum):
Expand Down Expand Up @@ -532,13 +536,6 @@ class DataType(ApiEnum):
STRUCT = 'struct'
NULL = 'null'

@classmethod
def from_str(cls, value):
if not value:
return None
default_map = {val.value: val for val in DataType}
return default_map[value.lower()]


class AgentInterface(ApiEnum):
# Duplicated in reainternal.enums, both should be kept in sync
Expand Down
68 changes: 34 additions & 34 deletions abacusai/api_class/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,18 +438,19 @@ class ChatLLMTrainingConfig(TrainingConfig):
Training config for the CHAT_LLM problem type
Args:
document_retrievers (List[str]): List of document retriever names to use for the feature stores this model was trained with.
num_completion_tokens (int): Default for maximum number of tokens for chat answers. Reducing this will get faster responses which are more succinct
temperature (float): The generative LLM temperature
document_retrievers (List[str]): List of names of document retrievers to use as vector stores of information for RAG responses.
num_completion_tokens (int): Default for maximum number of tokens for chat answers. Reducing this will get faster responses which are more succinct.
temperature (float): The generative LLM temperature.
retrieval_columns (list): Include the metadata column values in the retrieved search results.
filter_columns (list): Allow users to filter the document retrievers on these metadata columns.
include_general_knowledge (bool): Allow the LLM to rely not just on search results, but to fall back on general knowledge.
include_general_knowledge (bool): Allow the LLM to rely not just on RAG search results, but to fall back on general knowledge. Disabled by default.
enable_web_search (bool) : Allow the LLM to use Web Search Engines to retrieve information for better results.
behavior_instructions (str): Customize the overall role instructions for the LLM.
response_instructions (str): Customize instructions for what the LLM responses should look like.
enable_llm_rewrite (bool): Enable LLM rewrite for the ChatLLM. If None, LLM rewrite will happen automatically. Defaults to False.
response_instructions (str): Customized instructions for how the LLM should respond.
enable_llm_rewrite (bool): If enabled, an LLM will rewrite the RAG queries sent to document retriever. Disabled by default.
column_filtering_instructions (str): Instructions for a LLM call to automatically generate filter expressions on document metadata to retrieve relevant documents for the conversation.
keyword_requirement_instructions (str): Instructions for a LLM call to automatically generate keyword requirements to retrieve relevant documents for the conversation.
query_rewrite_instructions (str): Instructions for a LLM call to rewrite a search query.
query_rewrite_instructions (str): Special instructions for the LLM which rewrites the RAG query.
max_search_results (int): Maximum number of search results in the retrieval augmentation step. If we know that the questions are likely to have snippets which are easily matched in the documents, then a lower number will help with accuracy.
data_feature_group_ids: (List[str]): List of feature group IDs to use to possibly query for the ChatLLM. The created ChatLLM is commonly referred to as DataLLM.
data_prompt_context (str): Prompt context for the data feature group IDs.
Expand All @@ -458,33 +459,32 @@ class ChatLLMTrainingConfig(TrainingConfig):
search_score_cutoff (float): Minimum search score to consider a document as a valid search result.
database_connector_id (str): Database connector ID to use for the ChatLLM.
database_connector_tables (List[str]): List of tables to use from the database connector for the ChatLLM.
enable_code_execution (bool): Enable code execution in the ChatLLM.
metadata_columns (list): DEPRECATED. Include the metadata column values in the retrieved search results.
lookup_rewrite_instructions (str): DEPRECATED. Instructions for a LLM call to rewrite a search query.
"""
document_retrievers: List[str] = None
num_completion_tokens: int = None
temperature: float = None
retrieval_columns: list = None
filter_columns: list = None
include_general_knowledge: bool = None
behavior_instructions: str = None
response_instructions: str = None
enable_llm_rewrite: bool = False
column_filtering_instructions: str = None
keyword_requirement_instructions: str = None
query_rewrite_instructions: str = None
max_search_results: int = None
data_feature_group_ids: List[str] = None
data_prompt_context: str = None
hide_generated_sql: bool = None
disable_data_summarization: bool = None
search_score_cutoff: float = None
database_connector_id: str = None
database_connector_tables: List[str] = None
enable_code_execution: bool = None
metadata_columns: list = None
lookup_rewrite_instructions: str = None
enable_code_execution (bool): Enable python code execution in the ChatLLM. This equips the LLM with a python kernel in which all its code is executed.
"""
document_retrievers: List[str] = dataclasses.field(default=None)
num_completion_tokens: int = dataclasses.field(default=None)
temperature: float = dataclasses.field(default=None)
retrieval_columns: list = dataclasses.field(default=None)
filter_columns: list = dataclasses.field(default=None)
include_general_knowledge: bool = dataclasses.field(default=None)
enable_web_search: bool = dataclasses.field(default=None)
behavior_instructions: str = dataclasses.field(default=None)
response_instructions: str = dataclasses.field(default=None)
enable_llm_rewrite: bool = dataclasses.field(default=None)
column_filtering_instructions: str = dataclasses.field(default=None)
keyword_requirement_instructions: str = dataclasses.field(default=None)
query_rewrite_instructions: str = dataclasses.field(default=None)
max_search_results: int = dataclasses.field(default=None)
data_feature_group_ids: List[str] = dataclasses.field(default=None)
data_prompt_context: str = dataclasses.field(default=None)
hide_generated_sql: bool = dataclasses.field(default=None)
disable_data_summarization: bool = dataclasses.field(default=None)
search_score_cutoff: float = dataclasses.field(default=None)
database_connector_id: str = dataclasses.field(default=None)
database_connector_tables: List[str] = dataclasses.field(default=None)
enable_code_execution: bool = dataclasses.field(default=None)
metadata_columns: list = dataclasses.field(default=None, metadata={'deprecated': True})
lookup_rewrite_instructions: str = dataclasses.field(default=None, metadata={'deprecated': True})

def __post_init__(self):
self.problem_type = enums.ProblemType.CHAT_LLM
Expand Down
2 changes: 1 addition & 1 deletion abacusai/batch_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def __init__(self, client, batchPredictionId=None, createdAt=None, name=None, de
BatchPredictionArgs, globalPredictionArgs)
self.batch_prediction_args = client._build_class(getattr(
api_class, batchPredictionArgsType, BatchPredictionArgs) if batchPredictionArgsType else BatchPredictionArgs, batchPredictionArgs)
self.deprecated_keys = {'explanations', 'global_prediction_args'}
self.deprecated_keys = {'global_prediction_args', 'explanations'}

def __repr__(self):
repr_dict = {f'batch_prediction_id': repr(self.batch_prediction_id), f'created_at': repr(self.created_at), f'name': repr(self.name), f'deployment_id': repr(self.deployment_id), f'file_connector_output_location': repr(self.file_connector_output_location), f'database_connector_id': repr(self.database_connector_id), f'database_output_configuration': repr(self.database_output_configuration), f'file_output_format': repr(self.file_output_format), f'connector_type': repr(self.connector_type), f'legacy_input_location': repr(self.legacy_input_location), f'output_feature_group_id': repr(self.output_feature_group_id), f'feature_group_table_name': repr(self.feature_group_table_name), f'output_feature_group_table_name': repr(self.output_feature_group_table_name), f'summary_feature_group_table_name': repr(self.summary_feature_group_table_name), f'csv_input_prefix': repr(
Expand Down
2 changes: 1 addition & 1 deletion abacusai/batch_prediction_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def __init__(self, client, batchPredictionVersion=None, batchPredictionId=None,
BatchPredictionArgs, globalPredictionArgs)
self.batch_prediction_args = client._build_class(getattr(
api_class, batchPredictionArgsType, BatchPredictionArgs) if batchPredictionArgsType else BatchPredictionArgs, batchPredictionArgs)
self.deprecated_keys = {'explanations', 'global_prediction_args'}
self.deprecated_keys = {'global_prediction_args', 'explanations'}

def __repr__(self):
repr_dict = {f'batch_prediction_version': repr(self.batch_prediction_version), f'batch_prediction_id': repr(self.batch_prediction_id), f'status': repr(self.status), f'drift_monitor_status': repr(self.drift_monitor_status), f'deployment_id': repr(self.deployment_id), f'model_id': repr(self.model_id), f'model_version': repr(self.model_version), f'predictions_started_at': repr(self.predictions_started_at), f'predictions_completed_at': repr(self.predictions_completed_at), f'database_output_error': repr(self.database_output_error), f'total_predictions': repr(self.total_predictions), f'failed_predictions': repr(self.failed_predictions), f'database_connector_id': repr(self.database_connector_id), f'database_output_configuration': repr(self.database_output_configuration), f'file_connector_output_location': repr(self.file_connector_output_location), f'file_output_format': repr(self.file_output_format), f'connector_type': repr(self.connector_type), f'legacy_input_location': repr(self.legacy_input_location), f'error': repr(self.error), f'drift_monitor_error': repr(self.drift_monitor_error), f'monitor_warnings': repr(self.monitor_warnings), f'csv_input_prefix': repr(
Expand Down
Loading

0 comments on commit 19c5599

Please sign in to comment.