Skip to content

Commit

Permalink
Version 1.1.6
Browse files Browse the repository at this point in the history
  • Loading branch information
Austin Zielman committed Feb 29, 2024
1 parent 7fa14bb commit bf66397
Show file tree
Hide file tree
Showing 58 changed files with 1,364 additions and 582 deletions.
2 changes: 1 addition & 1 deletion abacusai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
from .streaming_client import StreamingClient


__version__ = "1.1.5"
__version__ = "1.1.6"
26 changes: 25 additions & 1 deletion abacusai/api_class/batch_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ class AnomalyOutliersBatchPredictionArgs(BatchPredictionArgs):
Args:
for_eval (bool): If True, the test fold which was created during training and used for metrics calculation will be used as input data. These predictions are hence, used for model evaluation.
threshold (float): The threshold for detecting an anomaly. Range: [0.8, 0.99]
explain_predictions (bool): If True, calculates explanations for the predicted values along with predictions.
"""
for_eval: bool = dataclasses.field(default=None)
threshold: float = dataclasses.field(default=None)
explain_predictions: bool = dataclasses.field(default=None)

def __post_init__(self):
self.problem_type = enums.ProblemType.ANOMALY_OUTLIERS
Expand All @@ -44,7 +46,8 @@ class ForecastingBatchPredictionArgs(BatchPredictionArgs):
start_date_offset (int): Sets prediction start date as this offset relative to the prediction start date.
forecasting_horizon (int): The number of timestamps to predict in the future. Range: [1, 1000].
item_attributes_to_include_in_the_result (list): List of columns to include in the prediction output.
explain_predictions (bool): If True, explain predictions for the forecast.
explain_predictions (bool): If True, calculates explanations for the forecasted values along with predictions.
automate_monitoring (bool): If True, creates a monitor to calculate the drift for the batch prediction.
"""
for_eval: bool = dataclasses.field(default=None)
predictions_start_date: str = dataclasses.field(default=None)
Expand All @@ -53,6 +56,7 @@ class ForecastingBatchPredictionArgs(BatchPredictionArgs):
forecasting_horizon: int = dataclasses.field(default=None)
item_attributes_to_include_in_the_result: list = dataclasses.field(default=None)
explain_predictions: bool = dataclasses.field(default=None)
automate_monitoring: bool = dataclasses.field(default=None)

def __post_init__(self):
self.problem_type = enums.ProblemType.FORECASTING
Expand Down Expand Up @@ -110,6 +114,8 @@ class PredictiveModelingBatchPredictionArgs(BatchPredictionArgs):
explanation_filter_upper_bound (float): If set explanations will be limited to predictions below this value, Range: [0, 1].
bound_label (str): For classification problems specifies the label to which the explanation bounds are applied.
output_columns (list): A list of column names to include in the prediction result.
explain_predictions (bool): If True, calculates explanations for the predicted values along with predictions.
automate_monitoring (bool): If True, creates a monitor to calculate the drift for the batch prediction.
"""
for_eval: bool = dataclasses.field(default=None)
explainer_type: enums.ExplainerType = dataclasses.field(default=None)
Expand All @@ -121,6 +127,8 @@ class PredictiveModelingBatchPredictionArgs(BatchPredictionArgs):
explanation_filter_upper_bound: float = dataclasses.field(default=None)
explanation_filter_label: str = dataclasses.field(default=None)
output_columns: list = dataclasses.field(default=None)
explain_predictions: bool = dataclasses.field(default=None)
automate_monitoring: bool = dataclasses.field(default=None)

def __post_init__(self):
self.problem_type = enums.ProblemType.PREDICTIVE_MODELING
Expand Down Expand Up @@ -194,6 +202,21 @@ def __post_init__(self):
self.problem_type = enums.ProblemType.CHAT_LLM


@dataclasses.dataclass
class TrainablePlugAndPlayBatchPredictionArgs(BatchPredictionArgs):
"""
Batch Prediction Config for the TrainablePlugAndPlay problem type
Args:
automate_monitoring (bool): If True, creates a monitor to calculate the drift for the batch prediction.
"""
for_eval: bool = dataclasses.field(default=None)
automate_monitoring: bool = dataclasses.field(default=None)

def __post_init__(self):
self.problem_type = enums.ProblemType.CUSTOM_ALGORITHM


@dataclasses.dataclass
class _BatchPredictionArgsFactory(_ApiClassFactory):
config_abstract_class = BatchPredictionArgs
Expand All @@ -208,4 +231,5 @@ class _BatchPredictionArgsFactory(_ApiClassFactory):
enums.ProblemType.SENTENCE_BOUNDARY_DETECTION: SentenceBoundaryDetectionBatchPredictionArgs,
enums.ProblemType.THEME_ANALYSIS: ThemeAnalysisBatchPredictionArgs,
enums.ProblemType.CHAT_LLM: ChatLLMBatchPredictionArgs,
enums.ProblemType.CUSTOM_ALGORITHM: TrainablePlugAndPlayBatchPredictionArgs,
}
2 changes: 2 additions & 0 deletions abacusai/api_class/document_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ class VectorStoreConfig(ApiClass):
chunk_overlap_fraction (float): The fraction of overlap between chunks.
text_encoder (VectorStoreTextEncoder): Encoder used to index texts from the documents.
chunk_size_factors (list): Chunking data with multiple sizes. The specified list of factors are used to calculate more sizes, in addition to `chunk_size`.
score_multiplier_column (str): If provided, will use the values in this metadata column to modify the relevance score of returned chunks for all queries.
"""
chunk_size: int = dataclasses.field(default=None)
chunk_overlap_fraction: float = dataclasses.field(default=None)
text_encoder: VectorStoreTextEncoder = dataclasses.field(default=None)
chunk_size_factors: list = dataclasses.field(default=None)
score_multiplier_column: str = dataclasses.field(default=None)


@dataclasses.dataclass
Expand Down
20 changes: 20 additions & 0 deletions abacusai/api_class/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ class ApplicationConnectorType(ApiEnum):
SHAREPOINT = 'SHAREPOINT'
TEAMS = 'TEAMS'
ABACUSUSAGEMETRICS = 'ABACUSUSAGEMETRICS'
MICROSOFTAUTH = 'MICROSOFTAUTH'


class PythonFunctionArgumentType(ApiEnum):
Expand Down Expand Up @@ -476,3 +477,22 @@ class OcrMode(ApiEnum):
COMPREHENSIVE = 'COMPREHENSIVE'
COMPREHENSIVE_V2 = 'COMPREHENSIVE_V2'
COMPREHENSIVE_TABLE_MD = 'COMPREHENSIVE_TABLE_MD'


class DataType(ApiEnum):
INTEGER = 'integer'
FLOAT = 'float'
STRING = 'string'
DATE = 'date'
DATETIME = 'datetime'
BOOLEAN = 'boolean'
LIST = 'list'
STRUCT = 'struct'
NULL = 'null'

@classmethod
def from_str(cls, value):
if not value:
return None
default_map = {val.value: val for val in DataType}
return default_map[value.lower()]
2 changes: 2 additions & 0 deletions abacusai/api_class/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,7 @@ class ChatLLMTrainingConfig(TrainingConfig):
hide_generated_sql (bool): When running data queries, hides the generated SQL in the response and will just return the table.
disable_data_summarization (bool): After executing a query summarize the reponse and reply back with only the table and query run.
search_score_cutoff (float): Minimum search score to consider a document as a valid search result.
database_connector_id (str): Database connector id to use for the chatllm.
"""
document_retrievers: List[str] = None
num_completion_tokens: int = None
Expand All @@ -468,6 +469,7 @@ class ChatLLMTrainingConfig(TrainingConfig):
hide_generated_sql: bool = None
disable_data_summarization: bool = None
search_score_cutoff: float = None
database_connector_id: str = None

def __post_init__(self):
self.problem_type = enums.ProblemType.CHAT_LLM
Expand Down
Loading

0 comments on commit bf66397

Please sign in to comment.