From 41ed2978236cd9e545c814c2fbd647e27f96eb91 Mon Sep 17 00:00:00 2001 From: LouisK92 <37270609+LouisK92@users.noreply.github.com> Date: Thu, 14 Dec 2023 12:13:54 +0100 Subject: [PATCH 1/3] Add unfinished version of chellcharter purity metric --- .../cellcharter_purity/cellchart_purity.json | 4 + .../cellcharter_purity/cellcharter_purity.py | 224 ++++++++++++++++++ .../cellcharter_purity/cellcharter_purity.yml | 8 + 3 files changed, 236 insertions(+) create mode 100644 metric/cellcharter_purity/cellchart_purity.json create mode 100755 metric/cellcharter_purity/cellcharter_purity.py create mode 100644 metric/cellcharter_purity/cellcharter_purity.yml diff --git a/metric/cellcharter_purity/cellchart_purity.json b/metric/cellcharter_purity/cellchart_purity.json new file mode 100644 index 00000000..6dc46f5d --- /dev/null +++ b/metric/cellcharter_purity/cellchart_purity.json @@ -0,0 +1,4 @@ +{ + "technology": "MERFISH", + "sample_file": null +} \ No newline at end of file diff --git a/metric/cellcharter_purity/cellcharter_purity.py b/metric/cellcharter_purity/cellcharter_purity.py new file mode 100755 index 00000000..873b91b8 --- /dev/null +++ b/metric/cellcharter_purity/cellcharter_purity.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python + +# Author_and_contribution: Niklas Mueller-Boetticher; created template +# Author_and_contribution: Louis B. Kuemmerle; wrapped cellcharter's purity metric +# Author_and_contribution: Marco Varrone; gave input on setting hyperparameters and heuristics that generalise well and helped with debugging + +# At the time of development (12th Dec 2023) cellcharter doesn't have release tags. The code used for development can +# be accessed via (commit 2b15549968559d9023d1fc92f9798a8c38dffcac): +# +# git clone https://github.com/CSOgroup/cellcharter.git +# cd cellcharter +# git checkout -b [new-branch-name] 2b15549968559d9023d1fc92f9798a8c38dffcac + +import argparse +import json +import numpy as np +import pandas as pd +import anndata as ad +import squidpy as sq +import cellcharter as cc + +# TODO: template arguments are strict. Need to readjust here. +parser = argparse.ArgumentParser(description="Calculate cellcharter's purity metric that measures if cells assigned to other connected components are located within the boundaries of given components.") +parser.add_argument("--coordinates", help="Path to coordinates (as tsv).", required=True) +parser.add_argument("-l", "--labels", help="Labels from domain clustering.", required=True) +parser.add_argument( + "-c", + "--config", + help="Config file (json) used to pass additional parameters.", + required=True, +) +parser.add_argument("-o", "--out_file", help="Output file.", required=True) + +SUPPORTED_TECHNOLOGIES = ["CODEX", "CosMx", "MERFISH", "IMC", "Visium", "RNA + ATAC"] +# for each technology we need to specify how to compute the graph connectivity and how to restrict connected components + +from anndata import AnnData +from scipy.sparse import csr_matrix +from squidpy._constants._constants import CoordType, Transform +from squidpy._constants._pkg_constants import Key +from squidpy.gr._utils import _assert_connectivity_key + +def remove_long_links( + adata: AnnData, + distance_percentile: float | None = 99.0, + mean_distance_factor: float | None = 4.0, + connectivity_key: str | None = None, + distances_key: str | None = None, + neighs_key: str | None = None, + copy: bool = False, +) -> tuple[csr_matrix, csr_matrix] | None: + """ + Remove links between cells at a distance bigger than a certain percentile or mean distance of all positive distances. + + It is designed for data with generic coordinates. + + Note: This function is copied from cellcharter.graph.remove_long_links and extended to allow for a mean distance + factor. + + Parameters + ---------- + %(adata)s + + distance_percentile + Percentile of the distances between cells over which links are trimmed after the network is built. + mean_distance_factor + Factor to multiply the mean distance between cells by to get the threshold for trimming links. If + ``distance_percentile`` is not ``None``, this parameter is ignored. + %(conn_key)s + + distances_key + Key in :attr:`anndata.AnnData.obsp` where spatial distances are stored. + Default is: :attr:`anndata.AnnData.obsp` ``['{{Key.obsp.spatial_dist()}}']``. + neighs_key + Key in :attr:`anndata.AnnData.uns` where the parameters from gr.spatial_neighbors are stored. + Default is: :attr:`anndata.AnnData.uns` ``['{{Key.uns.spatial_neighs()}}']``. + + %(copy)s + + Returns + ------- + If ``copy = True``, returns a :class:`tuple` with the new spatial connectivities and distances matrices. + + Otherwise, modifies the ``adata`` with the following keys: + - :attr:`anndata.AnnData.obsp` ``['{{connectivity_key}}']`` - the new spatial connectivities. + - :attr:`anndata.AnnData.obsp` ``['{{distances_key}}']`` - the new spatial distances. + - :attr:`anndata.AnnData.uns` ``['{{neighs_key}}']`` - :class:`dict` containing parameters. + """ + connectivity_key = Key.obsp.spatial_conn(connectivity_key) + distances_key = Key.obsp.spatial_dist(distances_key) + neighs_key = Key.uns.spatial_neighs(neighs_key) + _assert_connectivity_key(adata, connectivity_key) + _assert_connectivity_key(adata, distances_key) + + conns, dists = adata.obsp[connectivity_key], adata.obsp[distances_key] + + if copy: + conns, dists = conns.copy(), dists.copy() + + if distance_percentile is not None: + threshold = np.percentile(np.array(dists[dists != 0]).squeeze(), distance_percentile) + else: + threshold = np.mean(np.array(dists[dists != 0]).squeeze()) * mean_distance_factor + conns[dists > threshold] = 0 + dists[dists > threshold] = 0 + + conns.eliminate_zeros() + dists.eliminate_zeros() + + if copy: + return conns, dists + else: + adata.uns[neighs_key]["params"]["radius"] = threshold + +def get_alpha_start(adata, factor = 0.05): + """ Compute alpha start for boundary computation. + + Alpha start is computed based on a heuristic. We take the minimum length of the diagonals of the bounding boxes + over connected components and multiply it with a factor. + + The alpha start shouldn't be too large, otherwise alpha shapes get too close to the convex hull. Alpha start should + also not be too small to reduce computation time when searching for the optimal alpha. + + """ + distances = [] + for component in adata.obs["component"].dropna().unique(): + coords = adata[adata.obs["component"]==component].obsm["spatial"] + x1 = np.array([np.min(coords[:,0]), np.min(coords[:,1])]) + x2 = np.array([np.max(coords[:,0]), np.max(coords[:,1])]) + distances.append(np.linalg.norm(x1-x2)) + + return np.min(distances) * factor + +def script(): + + args = parser.parse_args() + + # Set paths and variables + coord_file = args.coordinates + label_file = args.labels + + if args.config is not None: + config_file = args.config + with open(config_file) as f: + config = json.load(f) + sample_file = config["sample_file"] if "sample_file" in config else None + technology = config["technology"] if "technology" in config else None + if technology not in SUPPORTED_TECHNOLOGIES: + raise ValueError(f"Technology {technology} not supported. Supported technologies are {SUPPORTED_TECHNOLOGIES}.") + + # Init adata + obs = pd.DataFrame( + data={ + "spatial_cluster":pd.read_table(label_file,index_col=0).iloc[:,0].values, + "sample": "sample1" if sample_file is None else pd.read_table(sample_file,index_col=0).iloc[:,0].values, + } + ) + obs.index = obs.index.astype(str) + obsm = {"spatial": pd.read_table(coord_file,index_col=0).values[:,:2]} + adata = ad.AnnData(X=None, obs=obs, obsm=obsm) + + # Compute graph connectivity + """ + From cellcharter paper: + Spatial network construction + Depending on the technology used, we employed different approaches implemented in the + Squidpy library (v.1.3.0) to construct the network. For the Visium and RNA + ATAC data, + which exhibit a regular structure, we assigned the six and four closest surrounding spots, + respectively, as neighbors for each spot. For the CODEX, CosMx, MERFISH and IMC data, we + constructed the network using Delaunay triangulation. However, Delaunay triangulation + can result in long edges between cells, especially at the slide borders. Therefore, we + eliminated edges between nodes if the distance between them exceeded the 99th percentile + of all distances between connected nodes. + """ + + if technology in ["CODEX", "CosMx", "MERFISH", "IMC"]: + adata_domains = adata[adata.obs["spatial_cluster"].notna()].copy() + #sq.gr.spatial_neighbors(adata_domains, delaunay=True, coord_type='generic') + #remove_long_links(adata_domains, distance_percentile=None, mean_distance_factor=2.0) + sq.gr.spatial_neighbors(adata_domains, delaunay=False, n_neighs=20, coord_type='generic') + #sq.gr.spatial_neighbors(adata, delaunay=True, coord_type='generic') + #cc.gr.remove_long_links(adata, distance_percentile = 99.0) + elif technology in ["Visium"]: + sq.gr.spatial_neighbors(adata, n_neighs=6, coord_type="grid") + elif technology in ["RNA + ATAC"]: + sq.gr.spatial_neighbors(adata, n_neighs=4, coord_type="grid") + else: + raise NotImplementedError(f"Graph connectivity calculation for the given technology {technology} is not implemented.") + + # Compute connected components + if technology in ["CODEX", "CosMx", "MERFISH", "IMC"]: + min_cells = 100 + cc.gr.connected_components(adata_domains, cluster_key='spatial_cluster',min_cells = min_cells) + adata.obs["component"] = adata_domains.obs["component"] + elif technology in ["Visium", "RNA + ATAC"]: + min_cells = 3 + cc.gr.connected_components(adata, cluster_key='spatial_cluster',min_cells = min_cells) + else: + raise NotImplementedError(f"Min Nr of cells for connected component identification for the given technology {technology} is not implemented.") + + adata.write(args.out_file.replace(".tsv",".h5ad")) #TODO: DELETE + + # Compute boundaries of domain clusters (saved in adata.uns[f"shape_component"]["boundary"]} + a_start = get_alpha_start(adata) + cc.tl.boundaries(adata, cluster_key = "component", min_hole_area_ratio = 0.1, alpha_start = a_start, copy = False) + print("N BOUNDARIES", len(adata.uns[f"shape_component"]["boundary"])) + + # Compute purities for each connected component + purities = cc.tl.purity(adata, cluster_key="component", library_key="sample", exterior=False, copy=True) + + # Compute purity metric (mean over purities of all connected components) + metric: float = np.mean([p for _,p in purities.items()]) + + ## Write output + from pathlib import Path + + Path(args.out_file).parent.mkdir(parents=True, exist_ok=True) + + with open(args.out_file, "w") as file: + file.write(f"{metric:.5e}\n") + +# NOTE: __name__ == "__main__" is required, otherwise the multiprocessing might fail +if __name__ == "__main__": + script() \ No newline at end of file diff --git a/metric/cellcharter_purity/cellcharter_purity.yml b/metric/cellcharter_purity/cellcharter_purity.yml new file mode 100644 index 00000000..e64b3a3f --- /dev/null +++ b/metric/cellcharter_purity/cellcharter_purity.yml @@ -0,0 +1,8 @@ +channels: + - conda-forge +dependencies: + - python=3.10.13 + - pip + - pip: + - cellcharter + # cellcharter doesn't have release tags yet \ No newline at end of file From fbb784c13eaebf2eba4d9e281de6b35ce3f179b6 Mon Sep 17 00:00:00 2001 From: LouisK92 <37270609+LouisK92@users.noreply.github.com> Date: Thu, 14 Dec 2023 14:57:13 +0100 Subject: [PATCH 2/3] Add debug functionalities and rename json file --- ...rt_purity.json => cellcharter_purity.json} | 0 .../cellcharter_purity/cellcharter_purity.py | 79 +++++++++++++++++-- 2 files changed, 73 insertions(+), 6 deletions(-) rename metric/cellcharter_purity/{cellchart_purity.json => cellcharter_purity.json} (100%) diff --git a/metric/cellcharter_purity/cellchart_purity.json b/metric/cellcharter_purity/cellcharter_purity.json similarity index 100% rename from metric/cellcharter_purity/cellchart_purity.json rename to metric/cellcharter_purity/cellcharter_purity.json diff --git a/metric/cellcharter_purity/cellcharter_purity.py b/metric/cellcharter_purity/cellcharter_purity.py index 873b91b8..f80c84fe 100755 --- a/metric/cellcharter_purity/cellcharter_purity.py +++ b/metric/cellcharter_purity/cellcharter_purity.py @@ -30,10 +30,14 @@ required=True, ) parser.add_argument("-o", "--out_file", help="Output file.", required=True) +parser.add_argument("--fig_path", help="File path for plot to check if components look reasonable.", required=False, default=False) #TODO: Delete SUPPORTED_TECHNOLOGIES = ["CODEX", "CosMx", "MERFISH", "IMC", "Visium", "RNA + ATAC"] # for each technology we need to specify how to compute the graph connectivity and how to restrict connected components + +###### helper functions ###### + from anndata import AnnData from scipy.sparse import csr_matrix from squidpy._constants._constants import CoordType, Transform @@ -131,6 +135,58 @@ def get_alpha_start(adata, factor = 0.05): return np.min(distances) * factor + +###### functions for debug plots ###### +# It's important that reasonable connected components are identified. This needs to be checked on different +# technologies, tissues, and spatial domains. Potentially the adjacency graph parameters need to be adjusted for better +# generalisation. + +def get_n_colors(n): + from random import shuffle + + # Generate a slightly larger list of colors to account for the removal of black and white + cmap = plt.cm.get_cmap('nipy_spectral', n + 2) # Adjusting for potential removal + + # Generate colors and convert to hex + colors = cmap(np.linspace(0, 1, n + 2)) + hex_colors = ['#%02x%02x%02x' % (int(r*255), int(g*255), int(b*255)) for r, g, b, _ in colors] + + # Remove black and white from the list + hex_colors = [color for color in hex_colors if color not in ['#000000', '#ffffff']] + + # Ensure the list has exactly n colors by taking the first n colors + shuffle(hex_colors) + return hex_colors[:n] + +import matplotlib.pyplot as plt + +def plot_components(adata, save=False): + """ Plot components and their boundaries. + + """ + n = len(adata.obs["spatial_cluster"].dropna().unique()) + colors = get_n_colors(n) + component_to_spatial_cluster = dict(zip(adata.obs["component"], adata.obs["spatial_cluster"])) + + # color_code strings + df = adata.obs + color_code = df.loc[~df['spatial_cluster'].isnull(),'spatial_cluster'].unique() + label_to_color = dict(zip(color_code,colors)) + df['color'] = 'lightgray' + df.loc[~df['spatial_cluster'].isnull(),'color'] = df.loc[~df['spatial_cluster'].isnull(),'spatial_cluster'].map(label_to_color) + + fig = plt.figure(figsize=(10,10)) + for i, polygon1 in adata.uns[f"shape_component"]["boundary"].items(): + plt.plot(*polygon1.exterior.xy, color=label_to_color[component_to_spatial_cluster[i]]) + + plt.scatter(adata.obsm["spatial"][:,0],adata.obsm["spatial"][:,1],c=adata.obs['color'], s=1) + + if save: + fig.savefig(save) + + +###### main function ###### + def script(): args = parser.parse_args() @@ -138,6 +194,7 @@ def script(): # Set paths and variables coord_file = args.coordinates label_file = args.labels + fig_path = args.fig_path #TODO: Delete? if args.config is not None: config_file = args.config @@ -172,14 +229,20 @@ def script(): eliminated edges between nodes if the distance between them exceeded the 99th percentile of all distances between connected nodes. """ - if technology in ["CODEX", "CosMx", "MERFISH", "IMC"]: adata_domains = adata[adata.obs["spatial_cluster"].notna()].copy() + + # Initial version from cellcharter + #sq.gr.spatial_neighbors(adata, delaunay=True, coord_type='generic') + #cc.gr.remove_long_links(adata, distance_percentile = 99.0) + + # Alternative cellcharter like version that should be more robust than percentiles #sq.gr.spatial_neighbors(adata_domains, delaunay=True, coord_type='generic') #remove_long_links(adata_domains, distance_percentile=None, mean_distance_factor=2.0) + + # Simple n neighbors version (danger: long range connections are not removed) sq.gr.spatial_neighbors(adata_domains, delaunay=False, n_neighs=20, coord_type='generic') - #sq.gr.spatial_neighbors(adata, delaunay=True, coord_type='generic') - #cc.gr.remove_long_links(adata, distance_percentile = 99.0) + elif technology in ["Visium"]: sq.gr.spatial_neighbors(adata, n_neighs=6, coord_type="grid") elif technology in ["RNA + ATAC"]: @@ -197,13 +260,10 @@ def script(): cc.gr.connected_components(adata, cluster_key='spatial_cluster',min_cells = min_cells) else: raise NotImplementedError(f"Min Nr of cells for connected component identification for the given technology {technology} is not implemented.") - - adata.write(args.out_file.replace(".tsv",".h5ad")) #TODO: DELETE # Compute boundaries of domain clusters (saved in adata.uns[f"shape_component"]["boundary"]} a_start = get_alpha_start(adata) cc.tl.boundaries(adata, cluster_key = "component", min_hole_area_ratio = 0.1, alpha_start = a_start, copy = False) - print("N BOUNDARIES", len(adata.uns[f"shape_component"]["boundary"])) # Compute purities for each connected component purities = cc.tl.purity(adata, cluster_key="component", library_key="sample", exterior=False, copy=True) @@ -211,6 +271,13 @@ def script(): # Compute purity metric (mean over purities of all connected components) metric: float = np.mean([p for _,p in purities.items()]) + # Write files for debug purposes + if fig_path: + plot_components(adata, save=fig_path) + # If adata is saved for debug purposes writing fails when the polygons are still in adata + # del adata.uns[f"shape_component"] + # adata.write(args.out_file.replace(".tsv",".h5ad")) #TODO: DELETE + ## Write output from pathlib import Path From 52ec423cdd85d480a38b5bcb725a41c2a9b884dd Mon Sep 17 00:00:00 2001 From: LouisK92 Date: Thu, 14 Dec 2023 15:52:15 +0000 Subject: [PATCH 3/3] Get script ready for tests to showcase robustness --- .../cellcharter_purity/cellcharter_purity.py | 57 +++++++++++++------ 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/metric/cellcharter_purity/cellcharter_purity.py b/metric/cellcharter_purity/cellcharter_purity.py index f80c84fe..47aaac3f 100755 --- a/metric/cellcharter_purity/cellcharter_purity.py +++ b/metric/cellcharter_purity/cellcharter_purity.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # Author_and_contribution: Niklas Mueller-Boetticher; created template -# Author_and_contribution: Louis B. Kuemmerle; wrapped cellcharter's purity metric +# Author_and_contribution: Louis B. Kuemmerle; wrote script # Author_and_contribution: Marco Varrone; gave input on setting hyperparameters and heuristics that generalise well and helped with debugging # At the time of development (12th Dec 2023) cellcharter doesn't have release tags. The code used for development can @@ -10,6 +10,7 @@ # git clone https://github.com/CSOgroup/cellcharter.git # cd cellcharter # git checkout -b [new-branch-name] 2b15549968559d9023d1fc92f9798a8c38dffcac +# pip install -e . import argparse import json @@ -19,7 +20,7 @@ import squidpy as sq import cellcharter as cc -# TODO: template arguments are strict. Need to readjust here. +# TODO: The arguments don't fully follow the template right now. This needs to be adjusted after final input args decisions + tests. parser = argparse.ArgumentParser(description="Calculate cellcharter's purity metric that measures if cells assigned to other connected components are located within the boundaries of given components.") parser.add_argument("--coordinates", help="Path to coordinates (as tsv).", required=True) parser.add_argument("-l", "--labels", help="Labels from domain clustering.", required=True) @@ -31,6 +32,7 @@ ) parser.add_argument("-o", "--out_file", help="Output file.", required=True) parser.add_argument("--fig_path", help="File path for plot to check if components look reasonable.", required=False, default=False) #TODO: Delete +parser.add_argument("--connectivity", help="Config how to calculate connectivity.", required=False, default="neighs") SUPPORTED_TECHNOLOGIES = ["CODEX", "CosMx", "MERFISH", "IMC", "Visium", "RNA + ATAC"] # for each technology we need to specify how to compute the graph connectivity and how to restrict connected components @@ -142,6 +144,7 @@ def get_alpha_start(adata, factor = 0.05): # generalisation. def get_n_colors(n): + """Get a list of n different colors""" from random import shuffle # Generate a slightly larger list of colors to account for the removal of black and white @@ -161,7 +164,7 @@ def get_n_colors(n): import matplotlib.pyplot as plt def plot_components(adata, save=False): - """ Plot components and their boundaries. + """ Plot connected components and their boundaries colored by spatial domains. """ n = len(adata.obs["spatial_cluster"].dropna().unique()) @@ -175,12 +178,17 @@ def plot_components(adata, save=False): df['color'] = 'lightgray' df.loc[~df['spatial_cluster'].isnull(),'color'] = df.loc[~df['spatial_cluster'].isnull(),'spatial_cluster'].map(label_to_color) - fig = plt.figure(figsize=(10,10)) + fig, axs = plt.subplots(1,2,figsize=(20,10)) + plt.sca(axs[0]) for i, polygon1 in adata.uns[f"shape_component"]["boundary"].items(): plt.plot(*polygon1.exterior.xy, color=label_to_color[component_to_spatial_cluster[i]]) - plt.scatter(adata.obsm["spatial"][:,0],adata.obsm["spatial"][:,1],c=adata.obs['color'], s=1) + plt.sca(axs[1]) + for i, polygon1 in adata.uns[f"shape_component"]["boundary"].items(): + plt.plot(*polygon1.exterior.xy, color=label_to_color[component_to_spatial_cluster[i]]) + plt.title(f"n components = {len(adata.uns[f'shape_component']['boundary'])}") + if save: fig.savefig(save) @@ -195,12 +203,13 @@ def script(): coord_file = args.coordinates label_file = args.labels fig_path = args.fig_path #TODO: Delete? + connectivity = args.connectivity #TODO: Delete if args.config is not None: config_file = args.config with open(config_file) as f: config = json.load(f) - sample_file = config["sample_file"] if "sample_file" in config else None + sample_file = config["sample_file"] if "sample_file" in config else None #TODO: Is this the best place to define the path? technology = config["technology"] if "technology" in config else None if technology not in SUPPORTED_TECHNOLOGIES: raise ValueError(f"Technology {technology} not supported. Supported technologies are {SUPPORTED_TECHNOLOGIES}.") @@ -230,18 +239,21 @@ def script(): of all distances between connected nodes. """ if technology in ["CODEX", "CosMx", "MERFISH", "IMC"]: - adata_domains = adata[adata.obs["spatial_cluster"].notna()].copy() - - # Initial version from cellcharter - #sq.gr.spatial_neighbors(adata, delaunay=True, coord_type='generic') - #cc.gr.remove_long_links(adata, distance_percentile = 99.0) - # Alternative cellcharter like version that should be more robust than percentiles - #sq.gr.spatial_neighbors(adata_domains, delaunay=True, coord_type='generic') - #remove_long_links(adata_domains, distance_percentile=None, mean_distance_factor=2.0) - - # Simple n neighbors version (danger: long range connections are not removed) - sq.gr.spatial_neighbors(adata_domains, delaunay=False, n_neighs=20, coord_type='generic') + #TODO: this should be reduced to one version (i.e. removing the `connectivity` param), needs to be tested on multiple datasets + #NOTE: One difference to the initial cellcharter implementation: we only consider cells that are assigned to a spatial domain + adata_domains = adata[adata.obs["spatial_cluster"].notna()].copy() + if connectivity == "delaunay_perc": + # Initial version from cellcharter + sq.gr.spatial_neighbors(adata_domains, delaunay=True, coord_type='generic') + cc.gr.remove_long_links(adata_domains, distance_percentile = 99.0) + elif connectivity == "delaunay_dist": + # Alternative cellcharter like version that should be more robust than percentiles + sq.gr.spatial_neighbors(adata_domains, delaunay=True, coord_type='generic') + remove_long_links(adata_domains, distance_percentile=None, mean_distance_factor=2.0) + elif connectivity == "neighs": + # Simple n neighbors version (danger: long range connections are not removed) + sq.gr.spatial_neighbors(adata_domains, delaunay=False, n_neighs=20, coord_type='generic') elif technology in ["Visium"]: sq.gr.spatial_neighbors(adata, n_neighs=6, coord_type="grid") @@ -262,6 +274,17 @@ def script(): raise NotImplementedError(f"Min Nr of cells for connected component identification for the given technology {technology} is not implemented.") # Compute boundaries of domain clusters (saved in adata.uns[f"shape_component"]["boundary"]} + """ + From cellcharter paper: + Shape characterization + To determine the boundaries of a cluster component, we developed a new technique based on alpha shapes86. For each + component, we computed the alpha shape with a starting value of alpha that depends on the resolution of the data. + If the alpha value was too small, the alpha shape of the component would yield multiple separated polygons. In such + cases, we doubled the alpha value and recomputed the alpha shape. This procedure is iterated until an alpha shape + composed of a single polygon is obtained. + + In this script we automated the setting of the minimum alpha: `get_alpha_start` + """ a_start = get_alpha_start(adata) cc.tl.boundaries(adata, cluster_key = "component", min_hole_area_ratio = 0.1, alpha_start = a_start, copy = False)