Merge branch 'doc-fixes' of https://github.com/graphistry/pygraphistry …

…into cleanup
graphistry · Feb 28, 2023 · d80f66f · d80f66f
2 parents a272ea3 + 0b4702a
commit d80f66f
Show file tree

Hide file tree

Showing 15 changed files with 410 additions and 230 deletions.
diff --git a/docs/source/graphistry.rst b/docs/source/graphistry.rst
@@ -1,42 +1,81 @@
-graphistry package
+Layout & Plugins
 ==================
 .. toctree::
    :maxdepth: 3
 
-   graphistry.compute
+
    graphistry.layout
    graphistry.plugins
    graphistry.plugins_types
 
 
-graphistry.plotter module
--------------------------
+Plotter Module
+==================
 
-.. automodule:: graphistry.plotter
+.. automodule:: graphistry.PlotterBase
     :members:
     :undoc-members:
     :show-inheritance:
 
-graphistry.pygraphistry module
-------------------------------
+Pygraphistry Module
+==================
 
 .. automodule:: graphistry.pygraphistry
     :members:
     :undoc-members:
     :show-inheritance:
 
-graphistry.arrow_uploader module
---------------------------------
+Featurize 
+==================
+.. automodule:: graphistry.feature_utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+UMAP 
+==================
+.. automodule:: graphistry.umap_utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Semantic Search 
+==================
+.. automodule:: graphistry.text_utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+DBScan
+==================
+.. automodule:: graphistry.compute.cluster
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+Arrow uploader Module
+==================
 
 .. automodule:: graphistry.arrow_uploader
     :members:
     :undoc-members:
     :show-inheritance:
 
-graphistry.ArrowFileUploader module
------------------------------------
+Arrow File Uploader Module
+==================
 
 .. automodule:: graphistry.ArrowFileUploader
     :members:
     :undoc-members:
     :show-inheritance:
+
+Versioneer
+==================
+
+.. automodule:: graphistry._version
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -1,8 +1,11 @@
-PyGraphistry's documentation (|version|)
+PyGraphistry[ai]'s documentation 
 ========================================
 
-Quickstart:
-`Read our tutorial <https://github.com/graphistry/pygraphistry/blob/master/README.md>`_
+.. Quickstart:
+.. `Read our tutorial <https://github.com/graphistry/pygraphistry/blob/master/README.md>`_
+
+PyGraphistry is a Python visual graph AI library to extract, transform, analyze, model, and visualize big graphs, and especially alongside Graphistry end-to-end GPU server sessions. Installing optional graphistry[ai] dependencies adds graph autoML, including automatic feature engineering, UMAP, and graph neural net support. Combined, PyGraphistry reduces your time to graph for going from raw data to visualizations and AI models down to three lines of code.
+Here in our docstrings you can find useful packages, modules, and commands to maximize your graph AI experience with PyGraphistry. In the navbar you can find an overview of all the packages and modules we provided and a few useful highlighted ones as well. You can search for them on our Search page. For a full tutorial, refer to our `PyGraphistry <https://github.com/graphistry/pygraphistry/>`_ repo.
 
 .. toctree::
    :maxdepth: 3

diff --git a/docs/source/modules.rst b/docs/source/modules.rst
@@ -1,9 +1,9 @@
-doc
-===
+.. doc
+.. ===
 
-.. toctree::
-   :maxdepth: 4
-   :caption: Contents:
+.. .. toctree::
+..    :maxdepth: 4
+..    :caption: Contents:
 
-   versioneer
+..    versioneer
 
diff --git a/docs/source/versioneer.rst b/docs/source/versioneer.rst
@@ -1,2 +1,2 @@
-versioneer module
-=================
+.. versioneer module
+.. =================
diff --git a/graphistry/PlotterBase.py b/graphistry/PlotterBase.py
@@ -300,7 +300,7 @@ def style(self, fg=None, bg=None, page=None, logo=None):
         :param fg: Dictionary {'blendMode': str} of any valid CSS blend mode
         :type fg: dict
 
-        :param bg: Nested dictionary of page background properties. {'color': str, 'gradient': {'kind': str, 'position': str, 'stops': list }, 'image': { 'url': str, 'width': int, 'height': int, 'blendMode': str }
+        :param bg: Nested dictionary of page background properties. { 'color': str, 'gradient': {'kind': str, 'position': str, 'stops': list }, 'image': { 'url': str, 'width': int, 'height': int, 'blendMode': str }
         :type bg: dict
 
         :param logo: Nested dictionary of logo properties. { 'url': str, 'autoInvert': bool, 'position': str, 'dimensions': { 'maxWidth': int, 'maxHeight': int }, 'crop': { 'top': int, 'left': int, 'bottom': int, 'right': int }, 'padding': { 'top': int, 'left': int, 'bottom': int, 'right': int}, 'style': str}        
@@ -314,15 +314,18 @@ def style(self, fg=None, bg=None, page=None, logo=None):
 
         **Example: Chained merge - results in url and blendMode being set, while color is dropped**
             ::
+
                 g2 =  g.style(bg={'color': 'black'}, fg={'blendMode': 'screen'})
                 g3 = g2.style(bg={'image': {'url': 'http://site.com/watermark.png'}})
                 
         **Example: Gradient background**
             ::
+
               g.style(bg={'gradient': {'kind': 'linear', 'position': 45, 'stops': [['rgb(0,0,0)', '0%'], ['rgb(255,255,255)', '100%']]}})
               
         **Example: Page settings**
             ::
+            
               g.style(page={'title': 'Site - {{ name }}', 'favicon': 'http://site.com/logo.ico'})
 
         """        
@@ -850,13 +853,14 @@ def bind(self, source=None, destination=None, node=None, edge=None,
         :param edge: Attribute containing an edge's ID
         :type edge: str
 
-        :param edge_title: Attribute overriding edge's minimized label text. By default, the edge source and destination is used.
+        :param edge_title: Attribute overriding edge's minimized label text. 
+        By default, the edge source and destination is used.
         :type edge_title: str
 
         :param edge_label: Attribute overriding edge's expanded label text. By default, scrollable list of attribute/value mappings.
         :type edge_label: str
 
-        :param edge_color: Attribute overriding edge's color. rgba (int64) or int32 palette index, see palette definitions <https://graphistry.github.io/docs/legacy/api/0.9.2/api.html#extendedpalette>`_ for values. Based on Color Brewer.
+        :param edge_color: Attribute overriding edge's color. rgba (int64) or int32 palette index, see `palette <https://graphistry.github.io/docs/legacy/api/0.9.2/api.html#extendedpalette>`_ definitions for values. Based on Color Brewer.
         :type edge_color: str
 
         :param edge_source_color: Attribute overriding edge's source color if no edge_color, as an rgba int64 value.
@@ -874,7 +878,7 @@ def bind(self, source=None, destination=None, node=None, edge=None,
         :param point_label: Attribute overriding node's expanded label text. By default, scrollable list of attribute/value mappings.
         :type point_label: str
 
-        :param point_color: Attribute overriding node's color.rgba (int64) or int32 palette index, see palette definitions <https://graphistry.github.io/docs/legacy/api/0.9.2/api.html#extendedpalette>`_ for values. Based on Color Brewer.
+        :param point_color: Attribute overriding node's color.rgba (int64) or int32 palette index, see `palette <https://graphistry.github.io/docs/legacy/api/0.9.2/api.html#extendedpalette>`_ definitions for values. Based on Color Brewer.
         :type point_color: str
 
         :param point_size: Attribute overriding node's size. By default, uses the node degree. The visualization will normalize point sizes and adjust dynamically using semantic zoom.
@@ -1007,6 +1011,7 @@ def nodes(self, nodes: Union[Callable, Any], node=None, *args, **kwargs) -> Plot
 
         **Example**
             ::
+
                 import graphistry
 
                 def sample_nodes(g, n):
@@ -1106,6 +1111,7 @@ def edges(self, edges: Union[Callable, Any], source=None, destination=None, edge
 
         **Example**
             ::
+
                 import graphistry
 
                 def sample_edges(g, n):

diff --git a/graphistry/compute/cluster.py b/graphistry/compute/cluster.py
@@ -71,11 +71,11 @@ def get_model_matrix(g, kind: str, cols: Optional[Union[List, str]], umap, targe
         Allows for a single function to get the model matrix for both nodes and edges as well as targets, embeddings, and features
 
     Args:
-        g: graphistry graph
-        kind: 'nodes' or 'edges'
-        cols: list of columns to use for clustering given `g.featurize` has been run
-        umap: whether to use UMAP embeddings or features dataframe
-        target: whether to use the target dataframe or features dataframe
+            :g: graphistry graph
+            :kind: 'nodes' or 'edges'
+            :cols: list of columns to use for clustering given `g.featurize` has been run
+            :umap: whether to use UMAP embeddings or features dataframe
+            :target: whether to use the target dataframe or features dataframe
 
     Returns:
         pd.DataFrame: dataframe of model matrix given the inputs
@@ -99,11 +99,11 @@ def dbscan_fit(g: Any, dbscan: Any, kind: str = "nodes", cols: Optional[Union[Li
     Fits clustering on UMAP embeddings if umap is True, otherwise on the features dataframe
         or target dataframe if target is True.
 
-    args:
-        g: graphistry graph
-        kind: 'nodes' or 'edges'
-        cols: list of columns to use for clustering given `g.featurize` has been run
-        use_umap_embedding: whether to use UMAP embeddings or features dataframe for clustering (default: True)
+    Args:
+        :g: graphistry graph
+        :kind: 'nodes' or 'edges'
+        :cols: list of columns to use for clustering given `g.featurize` has been run
+        :use_umap_embedding: whether to use UMAP embeddings or features dataframe for clustering (default: True)
     """
     X = get_model_matrix(g, kind, cols, use_umap_embedding, target)
 
@@ -212,6 +212,8 @@ def dbscan(
         """DBSCAN clustering on cpu or gpu infered automatically. Adds a `_dbscan` column to nodes or edges.
 
         Examples:
+        ::
+
             g = graphistry.edges(edf, 'src', 'dst').nodes(ndf, 'node')
 
             # cluster by UMAP embeddings
@@ -244,14 +246,14 @@ def dbscan(
              https://github.com/graphistry/pygraphistry/blob/master/demos/ai/cyber/cyber-redteam-umap-demo.ipynb
 
         Args:
-            min_dist float: The maximum distance between two samples for them to be considered as in the same neighborhood.
-            kind str: 'nodes' or 'edges'
-            cols: list of columns to use for clustering given `g.featurize` has been run, nice way to slice features or targets by
+            :min_dist float: The maximum distance between two samples for them to be considered as in the same neighborhood.
+            :kind str: 'nodes' or 'edges'
+            :cols: list of columns to use for clustering given `g.featurize` has been run, nice way to slice features or targets by
                 fragments of interest, e.g. ['ip_172', 'location', 'ssh', 'warnings']
-            fit_umap_embedding bool: whether to use UMAP embeddings or features dataframe to cluster DBSCAN
-            min_samples: The number of samples in a neighborhood for a point to be considered as a core point.
+            :fit_umap_embedding bool: whether to use UMAP embeddings or features dataframe to cluster DBSCAN
+            :min_samples: The number of samples in a neighborhood for a point to be considered as a core point.
                 This includes the point itself.
-            target: whether to use the target column as the clustering feature
+            :target: whether to use the target column as the clustering feature
 
         """
 
@@ -333,43 +335,51 @@ def transform_dbscan(
         Graph nodes | edges will be colored by '_dbscan' column.
             
             Examples:
+            ::
+
                 fit:
                     g = graphistry.edges(edf, 'src', 'dst').nodes(ndf, 'node')
                     g2 = g.featurize().dbscan()
 
                 predict:
+                ::
+
                     emb, X, _, ndf = g2.transform_dbscan(ndf, return_graph=False)
                     # or
                     g3 = g2.transform_dbscan(ndf, return_graph=True)
                     g3.plot()
 
             likewise for umap:
+            ::
+
                 fit:
                     g = graphistry.edges(edf, 'src', 'dst').nodes(ndf, 'node')
                     g2 = g.umap(X=.., y=..).dbscan()
 
                 predict:
+                ::
+
                     emb, X, y, ndf = g2.transform_dbscan(ndf, ndf, return_graph=False)
                     # or
                     g3 = g2.transform_dbscan(ndf, ndf, return_graph=True)
                     g3.plot()
 
 
-        args:
-            df: dataframe to transform
-            y: optional labels dataframe
-            min_dist: The maximum distance between two samples for them to be considered as in the same neighborhood.
+        Args:
+            :df: dataframe to transform
+            :y: optional labels dataframe
+            :min_dist: The maximum distance between two samples for them to be considered as in the same neighborhood.
                 smaller values will result in less edges between the minibatch and the original graph.
                 Default 'auto', infers min_dist from the mean distance and std of new points to the original graph
-            fit_umap_embedding: whether to use UMAP embeddings or features dataframe when inferring edges between
+            :fit_umap_embedding: whether to use UMAP embeddings or features dataframe when inferring edges between
                 the minibatch and the original graph. Default False, uses the features dataframe
-            sample: number of samples to use when inferring edges between the minibatch and the original graph,
+            :sample: number of samples to use when inferring edges between the minibatch and the original graph,
                 if None, will only use closest point to the minibatch. If greater than 0, will sample the closest `sample` points
                 in existing graph to pull in more edges. Default None
-            kind: 'nodes' or 'edges'
-            return_graph: whether to return a graph or the (emb, X, y, minibatch df enriched with DBSCAN labels), default True
+            :kind: 'nodes' or 'edges'
+            :return_graph: whether to return a graph or the (emb, X, y, minibatch df enriched with DBSCAN labels), default True
                 infered graph supports kind='nodes' only. 
-            verbose: whether to print out progress, default False
+            :verbose: whether to print out progress, default False
 
         """
         emb, X, y, df = self._transform_dbscan(df, y, kind=kind, verbose=verbose)