Merge pull request #878 from deeptools/maintenance_update

Maintenance update
deeptools · Nov 17, 2023 · 879650f · 879650f
2 parents 20be921 + 0b8a79d
commit 879650f
Show file tree

Hide file tree

Showing 42 changed files with 265 additions and 210 deletions.
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
@@ -7,7 +7,11 @@ assignees: ''
 
 ---
 
-**Welcome to the HiCExplorer GitHub repository! Before opening the issue please check
+**Welcome to the HiCExplorer GitHub repository!**
+
+If your issue concern `HiCPlotTADs`, please post your issue to the [pyGenomeTracks github repo](https://github.com/deeptools/pyGenomeTracks/issues/new/choose).
+
+**Before opening the issue please check
  that the following requirements are met :**
 
 - [ ] Search whether this issue (or a similar issue) has been solved before using the search tab above. Link the previous issue if appropriate below.

diff --git a/README.rst b/README.rst
@@ -37,6 +37,9 @@ The  `scHiCExplorer <https://github.com/joachimwolff/schicexplorer>`_.
 Citation:
 ^^^^^^^^^
 
+Joachim Wolff, Rolf Backofen, Björn Grüning.
+**Loop detection using Hi-C data with HiCExplorer**, GigaScience, Volume 11, 2022, giac061, https://doi.org/10.1093/gigascience/giac061
+
 Joachim Wolff, Leily Rabbani, Ralf Gilsbach, Gautier Richard, Thomas Manke, Rolf Backofen, Björn A Grüning.
 **Galaxy HiCExplorer 3: a web server for reproducible Hi-C, capture Hi-C and single-cell Hi-C data analysis, quality control and visualization, Nucleic Acids Research**, Nucleic Acids Research, Volume 48, Issue W1, 02 July 2020, Pages W177–W184, https://doi.org/10.1093/nar/gkaa220
 

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -9,12 +9,13 @@ jobs:
     vmImage: 'ubuntu-latest'
   strategy:
     matrix:
-      Python36:
-        python.version: '3.6'
-      Python37:
-        python.version: '3.7'
       Python38:
         python.version: '3.8'
+      Python39:
+        python.version: '3.9'
+      Python310:
+        python.version: '3.10'
+
 
   steps:
   - bash: |
@@ -23,14 +24,16 @@ jobs:
     displayName: Add conda to PATH
   - bash: |
       conda config --set always_yes yes --set changeps1 no
-      conda info -a
-      conda create -n hicexplorer --yes -c conda-forge -c bioconda python=$(python.version) --file requirements.txt
+      conda install mamba --yes -c conda-forge
+      hash -r
+      mamba info -a
+      mamba create -n hicexplorer --yes -c conda-forge -c bioconda python=$(python.version) --file requirements.txt
       source activate hicexplorer
-      conda install --yes -c conda-forge -c bioconda pytest flake8 pytest-xdist pytest-forked
-      conda install --yes -c conda-forge -c bioconda nose
-      conda install --yes pathlib
-      conda install --yes -c defaults -c conda-forge -c bioconda configparser
-      python setup.py install
+      mamba install --yes -c conda-forge -c bioconda pytest flake8 pytest-xdist pytest-forked
+      mamba install --yes -c conda-forge -c bioconda nose
+      mamba install --yes pathlib
+      mamba install --yes -c defaults -c conda-forge -c bioconda configparser
+      pip install .
     displayName: installing dependencies
   - script: |
       source activate hicexplorer
@@ -44,22 +47,25 @@ jobs:
   - script: |
       source activate hicexplorer
       py.test hicexplorer/test/general/ --doctest-modules --capture=sys -n 4  --ignore=hicexplorer/test/general/test_hicTADClassifier.py --ignore=hicexplorer/test/general/test_hicTrainTADClassifier.py
+    displayName: pytest
+  - script: |
+      source activate hicexplorer
       py.test hicexplorer/test/general/test_hicTADClassifier.py
       py.test hicexplorer/test/general/test_hicTrainTADClassifier.py
-    displayName: pytest
+    displayName: pytest_tad_classifier
 
 - job: 'OSX'
   timeoutInMinutes: 0
   pool:
-    vmImage: 'macOS-10.14'
+    vmImage: 'macOS-latest'
   strategy:
     matrix:
-      Python36:
-        python.version: '3.6'
-      Python37:
-        python.version: '3.7'
       Python38:
         python.version: '3.8'
+      Python39:
+        python.version: '3.9'
+      Python310:
+        python.version: '3.10'
 
   steps:
   - bash: |
@@ -75,7 +81,7 @@ jobs:
       conda install --yes -c conda-forge -c bioconda nose
       conda install --yes pathlib
       conda install --yes -c defaults -c conda-forge -c bioconda configparser
-      python setup.py install
+      pip install .
     displayName: installing dependencies
   - script: |
       source activate hicexplorer
@@ -89,6 +95,9 @@ jobs:
   - script: |
       source activate hicexplorer
       py.test hicexplorer/test/general/ --doctest-modules --capture=sys -n 4  --ignore=hicexplorer/test/general/test_hicTADClassifier.py --ignore=hicexplorer/test/general/test_hicTrainTADClassifier.py
+    displayName: pytest
+  - script: |
+      source activate hicexplorer
       py.test hicexplorer/test/general/test_hicTADClassifier.py
       py.test hicexplorer/test/general/test_hicTrainTADClassifier.py
-    displayName: pytest
+    displayName: pytest_tad_classifier
diff --git a/docs/content/News.rst b/docs/content/News.rst
@@ -2,6 +2,14 @@ News and Developments
 =====================
 
 
+Release 3.7.3
+-------------
+**17 November 2023**
+
+- Maintenance update for HiCExplorer to keep up to date with APIs of dependencies
+- Add additional of the polarization ratio to the output of hicCompartmentalization. Thanks @contessoto.
+
+
 Release 3.7.2
 -------------
 **1 October 2021**

diff --git a/hicexplorer/_version.py b/hicexplorer/_version.py
@@ -2,4 +2,4 @@
 # This file is originally generated from Git information by running 'setup.py
 # version'. Distribution tarballs contain a pre-generated copy of this file.
 
-__version__ = '3.7.2'
+__version__ = '3.7.3'
diff --git a/hicexplorer/chicExportData.py b/hicexplorer/chicExportData.py
@@ -148,7 +148,8 @@ def exportData(pFileList, pArgs, pViewpointObject, pDecimalPlace, pChromosomeSiz
 
                         file_content_string = header_information
                         for key in key_list:
-                            file_content_string += '\t'.join('{:.{decimal_places}f}'.format(x, decimal_places=pDecimalPlace) if isinstance(x, np.float) else str(x) for x in data[1][key]) + '\n'
+                            file_content_string += '\t'.join('{:.{decimal_places}f}'.format(x, decimal_places=pDecimalPlace) if isinstance(x, float) else str(x) for x in data[1][key]) + '\n'
+                        # breakpoint()
                     else:
                         for key in key_list:
                             chromosome_name.append(str(data[1][key][0]))
@@ -224,7 +225,9 @@ def exportData(pFileList, pArgs, pViewpointObject, pDecimalPlace, pChromosomeSiz
                     line_content, data = pViewpointObject.readAggregatedFileHDF(pArgs.file, sample)
                     file_content_string = header_information
                     for line in line_content:
-                        file_content_string += '\t'.join('{:.{decimal_places}f}'.format(x, decimal_places=pDecimalPlace) if isinstance(x, np.float) else str(x) for x in line) + '\n'
+                        file_content_string += '\t'.join('{:.{decimal_places}f}'.format(x, decimal_places=pDecimalPlace) if isinstance(x, float) else str(x) for x in line) + '\n'
+                    # breakpoint()
+
                     file_content_list.append(file_content_string)
 
                     file_name = '_'.join(sample) + '_' + pFileType + '.txt'
@@ -241,7 +244,9 @@ def exportData(pFileList, pArgs, pViewpointObject, pDecimalPlace, pChromosomeSiz
                     file_content_string = header_information
 
                     for line in item:
-                        file_content_string += '\t'.join('{:.{decimal_places}f}'.format(x, decimal_places=pDecimalPlace) if isinstance(x, np.float) else str(x) for x in line) + '\n'
+                        file_content_string += '\t'.join('{:.{decimal_places}f}'.format(x, decimal_places=pDecimalPlace) if isinstance(x, float) else str(x) for x in line) + '\n'
+                    # breakpoint()
+
                     file_content_list.append(file_content_string)
                     file_name = '_'.join(file) + '_' + item_classification[i] + '_' + pFileType + '.txt'
                     file_list.append(file_name)

diff --git a/hicexplorer/hicAdjustMatrix.py b/hicexplorer/hicAdjustMatrix.py
@@ -4,7 +4,7 @@
 import argparse
 from hicmatrix import HiCMatrix as hm
 from hicexplorer._version import __version__
-from hicmatrix.HiCMatrix import check_cooler
+from hicexplorer.utilities import check_cooler
 import numpy as np
 import cooler
 import logging
@@ -132,6 +132,7 @@ def adjustMatrix(pArgs):
         if len(genomic_regions) == 0:
             log.error('No valid chromosome given. Available: {}'.format(chromosomes_list))
             exit(1)
+
         matrix_indices_regions = []
         for region in genomic_regions:
             log.debug('region {}'.format(region))

diff --git a/hicexplorer/hicAggregateContacts.py b/hicexplorer/hicAggregateContacts.py
@@ -8,7 +8,7 @@
 matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
-import matplotlib.cm as cm
+from matplotlib import colormaps as cm
 from mpl_toolkits.mplot3d import Axes3D
 
 # from scipy.cluster.vq import vq, kmeans
@@ -515,7 +515,7 @@ def compute_clusters(updated_info, k, method="kmeans", how='full', max_deviation
             # shape = (num_submatrices, submatrix.shape[0] * submatrix.shape[1]
             # In other words, each submatrix is converted into a row of the matrix
             submat_vectors.append(submatrix.reshape((1, shape[0] * shape[1])))
-    matrix = np.vstack(submat_vectors)
+    matrix = np.asarray(np.vstack(submat_vectors))
     if how == 'diagonal':
         assert matrix.shape == (len(updated_info["submatrices"]), shape[0])
     elif how == 'center':
@@ -600,7 +600,7 @@ def cluster_matrices(agg_info, k, method='kmeans', how='full', perChr=False, max
                 updated_info[chrom1] = {"coords": full_coords, "centers": agg_info["agg_center_values"][chrom1][chrom2],
                                         "submatrices": agg_info["agg_matrix"][chrom1][chrom2],
                                         "clustered_dict": [], "diagonal": agg_info["agg_diagonals"][chrom1][chrom2]}
-                assert(chrom1 == chrom2)
+                assert (chrom1 == chrom2)
                 log.info("Length of entry on chr {}: {}".format(chrom1, len(agg_info["agg_matrix"][chrom1][chrom2])))
                 if len(agg_info["agg_matrix"][chrom1][chrom2]) < k:
                     log.info("number of the submatrices on chromosome {} is less than {}. Clustering is skipped.".format(chrom1, k))
@@ -660,7 +660,7 @@ def plot_aggregated_contacts(clustered_info, num_clusters, M_half, args):
     log.debug("vmax: {}, vmin: {}".format(vmax, vmin))
     chrom_avg = OrderedDict()
     for idx, (chrom1, v1) in enumerate(clustered_info.items()):
-        assert(v1 != {})
+        assert (v1 != {})
         if chrom1 not in chrom_avg.keys():
             chrom_avg[chrom1] = []
 
@@ -925,7 +925,7 @@ def main(args=None):
         exit("No susbmatrix found to be aggregated.")
 
     if args.kmeans is not None:
-        assert(args.kmeans > 1)
+        assert (args.kmeans > 1)
         if args.perChr == True:
             clustered_info = cluster_matrices(agg_info,
                                               k=args.kmeans, method='kmeans', how=args.howToCluster,
@@ -938,7 +938,7 @@ def main(args=None):
                                               keep_outlier=args.keep_outlier)
         num_clusters = args.kmeans
     elif args.hclust is not None:
-        assert(args.hclust > 1)
+        assert (args.hclust > 1)
         log.info("Performing hierarchical clustering."
                  "Please note that it might be very slow for large datasets.\n")
         if args.perChr == True:

diff --git a/hicexplorer/hicAverageRegions.py b/hicexplorer/hicAverageRegions.py
@@ -7,7 +7,7 @@
 import logging
 log = logging.getLogger(__name__)
 import numpy as np
-from scipy.sparse import csr_matrix, save_npz, lil_matrix
+from scipy.sparse import csr_matrix, save_npz, lil_matrix, coo_matrix
 
 
 def parse_arguments(args=None):
@@ -196,11 +196,13 @@ def main(args=None):
         if orientation is None or orientation == '+':
             summed_matrix[_start:_end, _start:_end] += hic_ma.matrix[start:end, start:end]
         elif orientation == '-':
-
             summed_matrix[_start:_end, _start:_end] += hic_ma.matrix[start:end, start:end].T
     summed_matrix /= count_matrix
-    summed_matrix = np.array(summed_matrix)
-    data = summed_matrix[np.nonzero(summed_matrix)]
+
+    summed_matrix = coo_matrix(summed_matrix)
+
+    data = summed_matrix.data
+
     row = np.nonzero(summed_matrix)[0]
     col = np.nonzero(summed_matrix)[1]
     summed_matrix = csr_matrix((data, (row, col)), shape=(dimensions_new_matrix, dimensions_new_matrix))

diff --git a/hicexplorer/hicCompartmentalization.py b/hicexplorer/hicCompartmentalization.py
@@ -99,7 +99,7 @@ def count_interactions(obs_exp, pc1, quantiles_number, offset):
     number_of_bins = np.zeros((quantiles_number, quantiles_number))
     if offset:
         for dist in offset:
-            assert(dist >= 0)
+            assert (dist >= 0)
             indices = np.arange(0, obs_exp.matrix.shape[0] - dist)
             obs_exp.matrix[indices, indices + dist] = np.nan
             obs_exp.matrix[indices + dist, indices] = np.nan
@@ -220,3 +220,4 @@ def main(args=None):
         np.savez(args.outputMatrix, [matrix for matrix in output_matrices])
     plot_polarization_ratio(
         polarization_ratio, args.outputFileName, labels, args.quantile)
+    np.savetxt(args.outputFileName + '_' + 'dat', polarization_ratio)
diff --git a/hicexplorer/hicCorrectMatrix.py b/hicexplorer/hicCorrectMatrix.py
@@ -368,7 +368,7 @@ def __init__(self, points):
         diff = np.sum((points - self.median), axis=-1)
 
         self.med_abs_deviation = np.median(np.abs(diff))
-        self.modified_z_score = self.mad_b_value * diff / self.med_abs_deviation
+        self.modified_z_score = np.multiply(self.mad_b_value, np.divide(diff, self.med_abs_deviation))
 
     def get_motified_zscores(self):
 
@@ -665,7 +665,7 @@ def main(args=None):
         if args.sequencedCountCutoff and 0 < args.sequencedCountCutoff < 1:
             chrom, _, _, coverage = zip(*ma.cut_intervals)
 
-            assert type(coverage[0]) == np.float64
+            assert type(coverage[0]) is np.float64
 
             failed_bins = np.flatnonzero(
                 np.array(coverage) < args.sequencedCountCutoff)
@@ -703,7 +703,7 @@ def main(args=None):
                 correction_factors.append(_corr_factors)
             else:
                 # Set the kr matrix along with its correction factors vector
-                assert(args.correctionMethod == 'KR')
+                assert (args.correctionMethod == 'KR')
                 log.debug("Loading a float sparse matrix for KR balancing")
                 kr = kr_balancing(chr_submatrix.shape[0],
                                   chr_submatrix.shape[1],
@@ -729,7 +729,7 @@ def main(args=None):
                 ma.matrix, args)
             ma.setMatrixValues(corrected_matrix)
         else:
-            assert(args.correctionMethod == 'KR')
+            assert (args.correctionMethod == 'KR')
             log.debug("Loading a float sparse matrix for KR balancing")
             kr = kr_balancing(ma.matrix.shape[0], ma.matrix.shape[1],
                               ma.matrix.count_nonzero(), ma.matrix.indptr.astype(np.int64, copy=False),

diff --git a/hicexplorer/hicCorrelate.py b/hicexplorer/hicCorrelate.py
@@ -16,6 +16,7 @@
 # for plotting
 from matplotlib import use as mplt_use
 import matplotlib as mpl
+from matplotlib import colormaps as cm
 
 mplt_use('Agg')
 
@@ -165,7 +166,7 @@ def plot_correlation(corr_matrix, labels, plot_filename, vmax=None,
                            link_color_func=lambda k: 'black')
     axdendro.set_xticks([])
     axdendro.set_yticks([])
-    cmap = plt.get_cmap(colormap)
+    cmap = cm.get_cmap(colormap)
 
     # this line simply makes a new cmap, based on the original
     # colormap that goes from 0.0 to 0.9