-
Notifications
You must be signed in to change notification settings - Fork 209
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
smart dependency manager #511
base: master
Are you sure you want to change the base?
Changes from 6 commits
9208e27
1b1a727
eb4ac0c
ea08c7c
e0c7123
a41f762
d54ee2e
01abf59
2e58fa5
2960bda
e2fac00
70d3e9b
4d8c6c8
3c2fdcf
f168a4f
5144e3c
f7a8e01
3e3d44c
e99cbe5
c8523ba
c2b0397
813fde2
caecfba
4af3fad
22e4d18
68537c6
886d51a
a4ca316
f6fb4b9
ed0262b
0f9539d
d34fef2
65eca98
629b648
ff7590b
4d7b824
fc89beb
6778a16
df5fcae
8c48dcf
c1df5ba
0c86a7e
86f51b3
7230af2
45415e8
9e28265
5e9956b
f595dc5
5e25907
f47b6d7
511187f
e7ba215
d784537
8e6cd50
fddde77
9aed732
2ee37fc
0011a73
e08c16f
e6f29dd
9f95b7e
1304968
4dd7d0a
a12898b
a1db061
9199db0
f3c12e9
95be2db
74092fc
3210019
abb999e
5192f79
0d165dd
032193a
75207ce
1f539f1
219555b
8b53e6d
3380fa5
c12ed7e
ecdd72b
181abfa
703e923
5d7f750
849baae
6935a91
c1f94c2
eeaef0b
8d4c1df
37ea918
8e32e0c
1f5f243
20430e0
a3bb113
9528e4a
d170ace
6a508c4
f5812bd
976d1dd
2faf466
2c96419
ab73859
a379787
580ef32
2c35bb2
3d5aa45
260c3b7
23e4257
c6417f9
457ef7a
69e59e7
6977d67
bba6c00
533a750
20b1f16
dd23f25
3b59258
5e63074
6db86a3
42f6a75
aadc84b
edbdf37
0ec47bb
139f7f9
3223a27
c47df98
26cd5e9
e47fa35
d8f9e6d
1904df5
a9d3d9e
0dd4ed6
6007eb7
6d0cb1c
86378eb
5b36dd0
90ca97a
08de406
b236337
85e1e24
c86cb53
5d5146f
8640971
58d9810
d02d480
a39928c
cedd9ad
dcfdd9c
cc8c4d2
79045df
21bf0c9
9801824
7b86a04
0eea678
7441b29
52abe0f
f87139d
637a991
21d2748
f0db78b
9189800
0de2ffa
071faf1
62c58bc
ab49794
8cb2838
bb8a258
0309329
f37ce87
83f8fc5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import importlib | ||
|
||
DEPS = ['cu_cat'] | ||
|
||
class DepManager: | ||
def __init__(self): | ||
self.pkgs = {} | ||
self.deps() | ||
|
||
def __getattr__(self, pkg): | ||
self._add_deps(pkg) | ||
try: | ||
return True, "ok", self.pkgs[pkg], self.pkgs[pkg].__version__ | ||
except KeyError: | ||
return False, str(pkg) + " not installed", None, None | ||
|
||
def _add_deps(self, pkg:str): | ||
if pkg not in self.pkgs.keys(): | ||
try: | ||
pkg_val = importlib.import_module(pkg) | ||
self.pkgs[pkg] = pkg_val | ||
setattr(self, pkg, pkg_val) | ||
except: | ||
setattr(self, pkg, None) | ||
|
||
def deps(self): | ||
[self._add_deps(dep) for dep in DEPS] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,7 @@ | |
from .PlotterBase import WeakValueDictionary, Plottable | ||
from .util import setup_logger, check_set_memoize | ||
from .ai_utils import infer_graph, infer_self_graph | ||
from .dep_manager import DepManager | ||
|
||
# add this inside classes and have a method that can set log level | ||
logger = setup_logger(name=__name__, verbose=config.VERBOSE) | ||
|
@@ -69,33 +70,11 @@ | |
|
||
|
||
#@check_set_memoize | ||
def lazy_import_has_dependancy_text(): | ||
import warnings | ||
warnings.filterwarnings("ignore") | ||
try: | ||
from sentence_transformers import SentenceTransformer | ||
return True, 'ok', SentenceTransformer | ||
except ModuleNotFoundError as e: | ||
return False, e, None | ||
|
||
def lazy_import_has_min_dependancy(): | ||
import warnings | ||
warnings.filterwarnings("ignore") | ||
try: | ||
import scipy.sparse # noqa | ||
from scipy import __version__ as scipy_version | ||
from dirty_cat import __version__ as dirty_cat_version | ||
from sklearn import __version__ as sklearn_version | ||
logger.debug(f"SCIPY VERSION: {scipy_version}") | ||
logger.debug(f"Dirty CAT VERSION: {dirty_cat_version}") | ||
logger.debug(f"sklearn VERSION: {sklearn_version}") | ||
return True, 'ok' | ||
except ModuleNotFoundError as e: | ||
return False, e | ||
|
||
deps = DepManager() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
|
||
def assert_imported_text(): | ||
has_dependancy_text_, import_text_exn, _ = lazy_import_has_dependancy_text() | ||
has_dependancy_text_, import_text_exn, _, _ = deps.sentence_transformers | ||
|
||
if not has_dependancy_text_: | ||
logger.error( # noqa | ||
"AI Package sentence_transformers not found," | ||
|
@@ -105,7 +84,14 @@ def assert_imported_text(): | |
|
||
|
||
def assert_imported(): | ||
has_min_dependancy_, import_min_exn = lazy_import_has_min_dependancy() | ||
has_min_dependancy_,import_min_exn,_,scipy_version = deps.scipy | ||
has_min_dependancy_,import_min_exn,_,dirty_cat_version = deps.dirty_cat | ||
has_min_dependancy_,import_min_exn,_,sklearn_version = deps.sklearn | ||
if None not in [scipy_version, dirty_cat_version, sklearn_version]: | ||
logger.debug(f"SCIPY VERSION: {scipy_version}") | ||
logger.debug(f"Dirty CAT VERSION: {dirty_cat_version}") | ||
logger.debug(f"sklearn VERSION: {sklearn_version}") | ||
|
||
if not has_min_dependancy_: | ||
logger.error( # noqa | ||
"AI Packages not found, trying running" # noqa | ||
|
@@ -149,10 +135,10 @@ def resolve_feature_engine( | |
return feature_engine # type: ignore | ||
|
||
if feature_engine == "auto": | ||
has_dependancy_text_, _, _ = lazy_import_has_dependancy_text() | ||
has_dependancy_text_, _, _, _ = deps.sentence_transformers | ||
if has_dependancy_text_: | ||
return "torch" | ||
has_min_dependancy_, _ = lazy_import_has_min_dependancy() | ||
has_min_dependancy_, _, _, _ = deps.dirty_cat | ||
if has_min_dependancy_: | ||
return "dirty_cat" | ||
return "pandas" | ||
|
@@ -169,7 +155,7 @@ def resolve_feature_engine( | |
|
||
def resolve_y(df: Optional[pd.DataFrame], y: YSymbolic) -> pd.DataFrame: | ||
|
||
if isinstance(y, pd.DataFrame) or 'cudf' in str(getmodule(y)): | ||
if isinstance(y, pd.DataFrame) or 'cudf.core.dataframe' in str(getmodule(y)): | ||
return y # type: ignore | ||
|
||
if df is None: | ||
|
@@ -190,7 +176,7 @@ def resolve_y(df: Optional[pd.DataFrame], y: YSymbolic) -> pd.DataFrame: | |
|
||
def resolve_X(df: Optional[pd.DataFrame], X: XSymbolic) -> pd.DataFrame: | ||
|
||
if isinstance(X, pd.DataFrame) or 'cudf' in str(getmodule(X)): | ||
if isinstance(X, pd.DataFrame) or 'cudf.core.dataframe' in str(getmodule(X)): | ||
return X # type: ignore | ||
|
||
if df is None: | ||
|
@@ -292,14 +278,7 @@ def remove_internal_namespace_if_present(df: pd.DataFrame): | |
config.IMPLICIT_NODE_ID, | ||
"index", # in umap, we add as reindex | ||
] | ||
|
||
if (len(df.columns) <= 2): | ||
df = df.rename(columns={c: c + '_1' for c in df.columns if c in reserved_namespace}) | ||
# if (isinstance(df.columns.to_list()[0],int)): | ||
# int_namespace = pd.to_numeric(df.columns, errors = 'ignore').dropna().to_list() # type: ignore | ||
# df = df.rename(columns={c: str(c) + '_1' for c in df.columns if c in int_namespace}) | ||
else: | ||
df = df.drop(columns=reserved_namespace, errors="ignore") # type: ignore | ||
df = df.drop(columns=reserved_namespace, errors="ignore") # type: ignore | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it looks like the code ^^ is to work around some error case, so why the change? |
||
return df | ||
|
||
|
||
|
@@ -703,7 +682,7 @@ def encode_textual( | |
max_df: float = 0.2, | ||
min_df: int = 3, | ||
) -> Tuple[pd.DataFrame, List, Any]: | ||
_, _, SentenceTransformer = lazy_import_has_dependancy_text() | ||
_, _, SentenceTransformer, _ = deps.sentence_transformers | ||
|
||
t = time() | ||
text_cols = get_textual_columns( | ||
|
@@ -1096,7 +1075,7 @@ def process_nodes_dataframes( | |
text_cols: List[str] = [] | ||
text_model: Any = None | ||
text_enc = pd.DataFrame([]) | ||
has_deps_text, import_text_exn, _ = lazy_import_has_dependancy_text() | ||
has_deps_text, import_text_exn, _, _ = deps.sentence_transformers | ||
if has_deps_text and (feature_engine in ["torch", "auto"]): | ||
text_enc, text_cols, text_model = encode_textual( | ||
df, | ||
|
@@ -1317,7 +1296,7 @@ def process_edge_dataframes( | |
|
||
:return: Encoded data matrix and target (if not None), the data encoders, and the label encoder. | ||
""" | ||
lazy_import_has_min_dependancy() | ||
deps.scipy | ||
from sklearn.preprocessing import ( | ||
MultiLabelBinarizer, | ||
) | ||
|
@@ -1467,7 +1446,7 @@ def transform_text( | |
text_cols: Union[List, str], | ||
) -> pd.DataFrame: | ||
from sklearn.pipeline import Pipeline | ||
_, _, SentenceTransformer = lazy_import_has_dependancy_text() | ||
_, _, SentenceTransformer, _ = deps.sentence_transformer() | ||
|
||
logger.debug("Transforming text using:") | ||
if isinstance(text_model, Pipeline): | ||
|
@@ -2005,8 +1984,7 @@ def _featurize_nodes( | |
logger.info("--- [[ RE-USING NODE FEATURIZATION ]]") | ||
fresh_res = copy.copy(res) | ||
for attr in ["_node_features", "_node_target", "_node_encoder"]: | ||
if hasattr(old_res, attr): | ||
setattr(fresh_res, attr, getattr(old_res, attr)) | ||
setattr(fresh_res, attr, getattr(old_res, attr)) | ||
|
||
return fresh_res | ||
|
||
|
@@ -2210,9 +2188,9 @@ def transform(self, df: pd.DataFrame, | |
""" | ||
|
||
# This is temporary until cucat release | ||
if 'cudf' in str(getmodule(df)): | ||
if 'cudf.core.dataframe' in str(getmodule(df)): | ||
df = df.to_pandas() # type: ignore | ||
if (y is not None) and ('cudf' in str(getmodule(y))): | ||
if (y is not None) and ('cudf.core.dataframe' in str(getmodule(y))): | ||
y = y.to_pandas() # type: ignore | ||
|
||
if kind == "nodes": | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add return types?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added
pkg:str
here to return type; i dont think this is related to the pytesttype check
errors below and in #489.but i also dont know why this
arrow_uploader
error is coming upThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
any idea about this type check error, seemingly involving arrow_uploader? another PR pings this same error, not sure what to make of it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Already fixed in main