From d4416aa82ff9872f047967ae129a320a482a3bd5 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 6 Feb 2024 12:17:27 +0100 Subject: [PATCH 01/26] Prepare next release 5.8.3-SNAPSHOT --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index 289046b4cc..0ac6807a6d 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.2 + 5.8.3-SNAPSHOT ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index ce235ddfa6..39295305ab 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.2 + 5.8.3-SNAPSHOT ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index 3e68b37513..aeacc5f42f 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.2 + 5.8.3-SNAPSHOT ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index ffb98718f8..1f0cab6002 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.2 + 5.8.3-SNAPSHOT ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index 691fc60712..805d371305 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.2 + 5.8.3-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index b79658f109..0d8d1b00a8 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.2 + 5.8.3-SNAPSHOT pom CellBase project @@ -23,8 +23,8 @@ ${project.version} - 4.12.0 - 2.12.1 + 4.12.1-SNAPSHOT + 2.12.2-SNAPSHOT 0.1.0 2.11.4 1.9.13 From e49d9948431127a0bb548d425b3f1f18cad3408a Mon Sep 17 00:00:00 2001 From: imedina Date: Mon, 11 Mar 2024 02:12:04 +0000 Subject: [PATCH 02/26] Implement dbSNP file download as 'variation' TASK-5794 --- .../cli/admin/executors/DownloadCommandExecutor.java | 6 +++--- .../cellbase/core/config/DownloadProperties.java | 10 ++++++++++ cellbase-core/src/main/resources/configuration.yml | 4 ++++ .../main/java/org/opencb/cellbase/lib/EtlCommons.java | 2 +- .../cellbase/lib/download/ClinicalDownloadManager.java | 7 +------ .../org/opencb/cellbase/lib/download/Downloader.java | 5 +++++ 6 files changed, 24 insertions(+), 10 deletions(-) diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DownloadCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DownloadCommandExecutor.java index f8197e6558..abb0629374 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DownloadCommandExecutor.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DownloadCommandExecutor.java @@ -69,9 +69,9 @@ public void execute() { case EtlCommons.GENE_DATA: downloadFiles.addAll(downloader.downloadGene()); break; -// case EtlCommons.VARIATION_DATA: -// downloadManager.downloadVariation(); -// break; + case EtlCommons.VARIATION_DATA: + downloadFiles.addAll(downloader.downloadVariation()); + break; case EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA: downloadFiles.addAll(downloader.downloadCaddScores()); break; diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java index ee4216f560..9a097fd202 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java @@ -51,6 +51,7 @@ public class DownloadProperties { private URLProperties clinvarSummary; private URLProperties clinvarVariationAllele; private URLProperties clinvarEfoTerms; + private URLProperties dbSNP; private URLProperties iarctp53; private URLProperties docm; private URLProperties docmVersion; @@ -263,6 +264,15 @@ public DownloadProperties setClinvarEfoTerms(URLProperties clinvarEfoTerms) { return this; } + public URLProperties getDbSNP() { + return dbSNP; + } + + public DownloadProperties setDbSNP(URLProperties dbSNP) { + this.dbSNP = dbSNP; + return this; + } + public URLProperties getIarctp53() { return iarctp53; } diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml index 0f8d199118..bbeee761bb 100644 --- a/cellbase-core/src/main/resources/configuration.yml +++ b/cellbase-core/src/main/resources/configuration.yml @@ -117,6 +117,9 @@ download: host: http://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variation_allele.txt.gz clinvarEfoTerms: host: ftp://ftp.ebi.ac.uk/pub/databases/eva/ClinVar/2015/ClinVar_Traits_EFO_Names_260615.csv + dbSNP: + host: https://ftp.ncbi.nih.gov/snp/latest_release/VCF/GCF_000001405.40.gz + version: "156" iarctp53: host: http://p53.iarc.fr/ajax/Zipper.ashx docm: @@ -197,6 +200,7 @@ species: - refseq - regulation - repeats + - variation - variation_functional_score - splice_score shards: diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java index 4396f0c2f1..0ace30987c 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java @@ -63,7 +63,7 @@ public class EtlCommons { public static final String IARCTP53_FILE = "IARC-TP53.zip"; public static final String GWAS_FILE = "gwas_catalog.tsv"; public static final String COSMIC_FILE = "CosmicMutantExport.tsv.gz"; - public static final String DBSNP_FILE = "All.vcf.gz"; + public static final String DBSNP_FILE = "GCF_000001405.40.gz"; public static final String STRUCTURAL_VARIANTS_DATA = "svs"; public static final String REPEATS_DATA = "repeats"; diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ClinicalDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ClinicalDownloadManager.java index 580a855a19..eb1f28db2d 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ClinicalDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ClinicalDownloadManager.java @@ -55,12 +55,7 @@ public List download() throws IOException, InterruptedException { public List downloadClinical() throws IOException, InterruptedException { if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { -// if (assemblyConfiguration.getName() == null) { -// throw new ParameterException("Assembly must be provided for downloading clinical variants data." -// + " Please, specify either --assembly GRCh37 or --assembly GRCh38"); -// } - - logger.info("Downloading clinical information ..."); + logger.info("Downloading clinical variant information ..."); String url; List downloadFiles = new ArrayList<>(); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/Downloader.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/Downloader.java index 17022cae4b..0deb62386b 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/Downloader.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/Downloader.java @@ -64,6 +64,11 @@ public List downloadConservation() throws IOException, CellBaseExc return manager.downloadConservation(); } + public List downloadVariation() throws IOException, CellBaseException, InterruptedException { + VariationDownloadManager manager = new VariationDownloadManager(species, assembly, outputDirectory, configuration); + return manager.download(); + } + public List downloadClinicalVariants() throws IOException, CellBaseException, InterruptedException { ClinicalDownloadManager manager = new ClinicalDownloadManager(species, assembly, outputDirectory, configuration); return manager.download(); From 26a008a7ceafbb0c0b6e45d3f8393c584d781644 Mon Sep 17 00:00:00 2001 From: imedina Date: Mon, 11 Mar 2024 02:12:19 +0000 Subject: [PATCH 03/26] Implement dbSNP file download as 'variation' TASK-5794 --- .../download/VariationDownloadManager.java | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java new file mode 100644 index 0000000000..5e497675e7 --- /dev/null +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java @@ -0,0 +1,61 @@ +/* + * Copyright 2015-2020 OpenCB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.opencb.cellbase.lib.download; + +import org.opencb.cellbase.core.config.CellBaseConfiguration; +import org.opencb.cellbase.core.config.DownloadProperties; +import org.opencb.cellbase.core.exception.CellBaseException; +import org.opencb.cellbase.lib.EtlCommons; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Collections; +import java.util.List; + +public class VariationDownloadManager extends AbstractDownloadManager { + + private static final String DBSNP_NAME = "dbSNP"; + public VariationDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration) + throws IOException, CellBaseException { + super(species, assembly, targetDirectory, configuration); + } + + @Override + public List download() throws IOException, InterruptedException { + return Collections.singletonList(downloadCaddScores()); + } + + public DownloadFile downloadCaddScores() throws IOException, InterruptedException { + if (!speciesHasInfoToDownload(speciesConfiguration, "variation")) { + return null; + } + if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { + logger.info("Downloading dbSNP scores information ..."); + + Path variation = downloadFolder.resolve("variation"); + Files.createDirectories(variation); + + DownloadProperties.URLProperties dbSNP = configuration.getDownload().getDbSNP(); + String url = dbSNP.getHost(); + saveVersionData(EtlCommons.VARIATION_DATA, DBSNP_NAME, dbSNP.getVersion(), getTimeStamp(), + Collections.singletonList(url), variation.resolve("dbSnpVersion.json")); + return downloadFile(url, variation.resolve(EtlCommons.DBSNP_FILE).toString()); + } + return null; + } +} From a267dc3c5cd61f32d68a74a299f07eb61ba1ca8e Mon Sep 17 00:00:00 2001 From: imedina Date: Mon, 11 Mar 2024 03:13:57 +0000 Subject: [PATCH 04/26] Implement VariationBuilder TASK-5794 --- .../admin/executors/BuildCommandExecutor.java | 11 ++ .../lib/builders/CaddScoreBuilder.java | 2 +- .../lib/builders/VariationBuilder.java | 157 ++++++++++++++++++ 3 files changed, 169 insertions(+), 1 deletion(-) create mode 100644 cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/VariationBuilder.java diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java index 8c0d477023..35814cabe3 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java @@ -132,6 +132,9 @@ public void execute() { case EtlCommons.REFSEQ_DATA: parser = buildRefSeq(); break; + case EtlCommons.VARIATION_DATA: + parser = buildVariation(); + break; case EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA: parser = buildCadd(); break; @@ -275,6 +278,14 @@ private CellBaseBuilder buildRefSeq() { return new RefSeqGeneBuilder(refseqFolderPath, speciesConfiguration, serializer); } + private CellBaseBuilder buildVariation() { + Path variationFunctionalScorePath = downloadFolder.resolve("variation"); + copyVersionFiles(Arrays.asList(variationFunctionalScorePath.resolve("dbSnpVersion.json"))); + Path variationFilePath = variationFunctionalScorePath.resolve(EtlCommons.DBSNP_FILE); + CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "dbSNP"); + return new VariationBuilder(variationFilePath, serializer); + } + private CellBaseBuilder buildCadd() { Path variationFunctionalScorePath = downloadFolder.resolve("variation_functional_score"); copyVersionFiles(Arrays.asList(variationFunctionalScorePath.resolve("caddVersion.json"))); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CaddScoreBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CaddScoreBuilder.java index b593f44901..f4c6c861fd 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CaddScoreBuilder.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CaddScoreBuilder.java @@ -42,7 +42,7 @@ public CaddScoreBuilder(Path caddFilePath, CellBaseSerializer serializer) { super(serializer); this.caddFilePath = caddFilePath; - logger = LoggerFactory.getLogger(ConservationBuilder.class); + logger = LoggerFactory.getLogger(CaddScoreBuilder.class); } /* Example: diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/VariationBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/VariationBuilder.java new file mode 100644 index 0000000000..e00137505b --- /dev/null +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/VariationBuilder.java @@ -0,0 +1,157 @@ +/* + * Copyright 2015-2020 OpenCB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.opencb.cellbase.lib.builders; + +import org.opencb.biodata.models.core.Snp; +import org.opencb.biodata.models.core.SnpAnnotation; +import org.opencb.biodata.models.variant.avro.PopulationFrequency; +import org.opencb.cellbase.core.serializer.CellBaseSerializer; +import org.opencb.commons.utils.FileUtils; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +/** + * Created by imedina on 06/11/15. + */ +public class VariationBuilder extends CellBaseBuilder { + + private Path dbSnpFilePath; + + private static final int CHUNK_SIZE = 1000; + private static final int DECIMAL_RESOLUTION = 100; + + public VariationBuilder(Path dbSnpFilePath, CellBaseSerializer serializer) { + super(serializer); + this.dbSnpFilePath = dbSnpFilePath; + + logger = LoggerFactory.getLogger(VariationBuilder.class); + } + + /* Example: + ## dbSNP 156 + #CHROM POS ID REF ALT QUAL FILTER INFO + NC_000001.11 926003 rs1329301928 C A,T . . RS=1329301928;dbSNPBuildID=151;SSR=0; + GENEINFO=SAMD11:148398|LOC107985728:107985728;VC=SNV;NSM;R5;GNO; + FREQ=Estonian:0.9998,0.0002232,.|TOMMO:0.9999,.,0.0001062|dbGaP_PopFreq:0.9999,5.4e-05,0; + CLNVI=.,.,;CLNORIGIN=.,.,1;CLNSIG=.,.,0;CLNDISDB=.,.,MedGen:CN517202;CLNDN=.,.,not_provided;CLNREVSTAT=.,.,single; + CLNACC=.,.,RCV001929748.1;CLNHGVS=NC_000001.11:g.926003=,NC_000001.11:g.926003C>A,NC_000001.11:g.926003C>T + NC_000001.11 925952 rs1640863258 G A . . RS=1640863258;SSR=0; + GENEINFO=SAMD11:148398|LOC107985728:107985728;VC=SNV;NSM;R5;CLNVI=.,;CLNORIGIN=.,1;CLNSIG=.,0;CLNDISDB=.,MedGen:CN517202; + CLNDN=.,not_provided;CLNREVSTAT=.,single;CLNACC=.,RCV001318826.4;CLNHGVS=NC_000001.11:g.925952=,NC_000001.11:g.925952G>A + NC_000001.11 925953 rs1349221494 G A,T . . RS=1349221494;dbSNPBuildID=151;SSR=0; + GENEINFO=SAMD11:148398|LOC107985728:107985728;VC=SNV;SYN;R5;GNO; + FREQ=GnomAD:1,1.426e-05,.|GnomAD_exomes:1,.,4.008e-06|TOPMED:1,3.778e-06,.|dbGaP_PopFreq:1,0,3.124e-05 + NC_000001.11 925956 rs1342334044 C T . . RS=1342334044;dbSNPBuildID=155;SSR=0; + GENEINFO=SAMD11:148398|LOC107985728:107985728;VC=SNV;SYN;R5;GNO; + FREQ=TOPMED:1,1.133e-05|dbGaP_PopFreq:1,0; + CLNVI=.,;CLNORIGIN=.,1;CLNSIG=.,3;CLNDISDB=.,MedGen:CN517202;CLNDN=.,not_provided;CLNREVSTAT=.,single;CLNACC=.,RCV002170030.3; + CLNHGVS=NC_000001.11:g.925956=,NC_000001.11:g.925956C>T + */ + @Override + public void parse() throws Exception { + FileUtils.checkPath(dbSnpFilePath); + + BufferedReader bufferedReader = FileUtils.newBufferedReader(dbSnpFilePath); + + String line; + String[] fields; + + String currentChromosome = null; + String chromosome = null; + int position; + String id; + String ref; + String[] alt; + String info; + Map infoMap; + List flags; + + while ((line = bufferedReader.readLine()) != null) { + if (!line.startsWith("#")) { + fields = line.split("\t"); + + // this only happens the first time, when we start reading the file + if (chromosome == null) { + logger.info("Parsing chr {} ", fields[0]); + currentChromosome = fields[0]; + chromosome = fields[0].split("\\.")[0]; + } + + position = Integer.parseInt(fields[1]); + id = fields[2]; + ref = fields[3]; + alt = fields[4].split(","); + info = fields[7]; + + String[] infoFields = info.split(";"); + flags = new ArrayList<>(); + SnpAnnotation snpAnnotation = new SnpAnnotation(); + for (String infoField : infoFields) { + String[] infoKeyValue = infoField.split("="); + switch (infoKeyValue[0]) { + case "GENEINFO": + snpAnnotation.setGene(infoKeyValue[1].split(":")[0]); + break; + case "FREQ": + String[] studies = infoKeyValue[1].split("\\|"); + List populationFrequencies = new ArrayList<>(); + for (String study : studies) { + String[] freqFields = study.split("[:,]"); + for (int i = 0; i < alt.length; i++) { + if (!freqFields[i + 2].equals(".")) { + PopulationFrequency populationFrequency = new PopulationFrequency(); + populationFrequency.setStudy(freqFields[0]); + System.out.println(freqFields[i + 1]); + populationFrequency.setRefAlleleFreq(Float.parseFloat(freqFields[i + 1])); + // freqFields[1] is the 'ref' allele ALT freq + populationFrequency.setAltAllele(alt[i]); + populationFrequency.setAltAlleleFreq(Float.parseFloat(freqFields[i + 2])); + populationFrequencies.add(populationFrequency); + } + } + } + snpAnnotation.setPopulationFrequencies(populationFrequencies); + break; + default: + if (infoKeyValue.length == 1) { + flags.add(infoKeyValue[0]); + } + } + } + snpAnnotation.setFlags(flags); + + if (!currentChromosome.equals(fields[0])) { + logger.info("Parsing chr {} ", fields[0]); + + } + + Snp snp = new Snp(id, chromosome, position, ref, Arrays.asList(alt), "SNV", "dbSNP", "156", snpAnnotation); + serializer.serialize(snp); + } + } + + serializer.close(); + bufferedReader.close(); + logger.info("Parsing finished."); + } +} From c583eddc4ca69f6e1bcf9234942fcee281cb937e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 11 Mar 2024 09:39:16 +0100 Subject: [PATCH 05/26] lib: minor changes when downloading dbSNP data, #TASK-5815, #TASK-5789 --- .../app/cli/admin/AdminCliOptionsParser.java | 12 +++++++--- .../org/opencb/cellbase/lib/EtlCommons.java | 3 +++ .../download/VariationDownloadManager.java | 22 +++++++++++-------- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java index 6049ef9b4b..4a5f2c085f 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java @@ -19,6 +19,7 @@ import com.beust.jcommander.*; import org.opencb.cellbase.app.cli.CliOptionsParser; import org.opencb.cellbase.core.api.key.ApiKeyQuota; +import org.opencb.cellbase.lib.EtlCommons; import java.util.HashMap; import java.util.List; @@ -74,6 +75,7 @@ public AdminCliOptionsParser() { jCommander.addCommand("validate", validationCommandOptions); } + @Override public void parse(String[] args) throws ParameterException { jCommander.parse(args); } @@ -87,9 +89,13 @@ public class DownloadCommandOptions { @ParametersDelegate public SpeciesAndAssemblyCommandOptions speciesAndAssemblyOptions = speciesAndAssemblyCommandOptions; - @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to download: genome, gene, " - + "variation, variation_functional_score, regulation, protein, conservation, " - + "clinical_variants, repeats, svs, pubmed and 'all' to download everything", required = true, arity = 1) + @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to download:" + + EtlCommons.GENOME_DATA + ", " + EtlCommons.GENE_DATA + ", " + EtlCommons.VARIATION_DATA + ", " + + EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA + ", " + EtlCommons.MISSENSE_VARIATION_SCORE_DATA + ", " + + EtlCommons.REGULATION_DATA + ", " + EtlCommons.PROTEIN_DATA + ", " + EtlCommons.CONSERVATION_DATA + ", " + + EtlCommons.CLINICAL_VARIANTS_DATA + ", " + EtlCommons.REPEATS_DATA + ", " + EtlCommons.OBO_DATA + ", " + + EtlCommons.PUBMED_DATA + ", " + EtlCommons.PHARMACOGENOMICS_DATA + "; and 'all' to download everything", + required = true, arity = 1) public String data; @Parameter(names = {"-o", "--outdir"}, description = "Downloaded files will be saved in this directory.", required = true, arity = 1) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java index 0ace30987c..26c595598f 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java @@ -63,7 +63,10 @@ public class EtlCommons { public static final String IARCTP53_FILE = "IARC-TP53.zip"; public static final String GWAS_FILE = "gwas_catalog.tsv"; public static final String COSMIC_FILE = "CosmicMutantExport.tsv.gz"; + @Deprecated public static final String DBSNP_FILE = "GCF_000001405.40.gz"; + public static final String DBSNP_NAME = "dbSNP"; + public static final String DBSNP_VERSION_FILENAME = DBSNP_NAME + "Version.json"; public static final String STRUCTURAL_VARIANTS_DATA = "svs"; public static final String REPEATS_DATA = "repeats"; diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java index 5e497675e7..0f0f967831 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java @@ -19,17 +19,18 @@ import org.opencb.cellbase.core.config.CellBaseConfiguration; import org.opencb.cellbase.core.config.DownloadProperties; import org.opencb.cellbase.core.exception.CellBaseException; -import org.opencb.cellbase.lib.EtlCommons; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.Collections; import java.util.List; +import static org.opencb.cellbase.lib.EtlCommons.*; + public class VariationDownloadManager extends AbstractDownloadManager { - private static final String DBSNP_NAME = "dbSNP"; public VariationDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration) throws IOException, CellBaseException { super(species, assembly, targetDirectory, configuration); @@ -37,24 +38,27 @@ public VariationDownloadManager(String species, String assembly, Path targetDire @Override public List download() throws IOException, InterruptedException { - return Collections.singletonList(downloadCaddScores()); + return Collections.singletonList(downloadDbSnp()); } - public DownloadFile downloadCaddScores() throws IOException, InterruptedException { - if (!speciesHasInfoToDownload(speciesConfiguration, "variation")) { + public DownloadFile downloadDbSnp() throws IOException, InterruptedException { + if (!speciesHasInfoToDownload(speciesConfiguration, VARIATION_DATA)) { return null; } if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { logger.info("Downloading dbSNP scores information ..."); - Path variation = downloadFolder.resolve("variation"); + Path variation = downloadFolder.resolve(VARIATION_DATA); Files.createDirectories(variation); DownloadProperties.URLProperties dbSNP = configuration.getDownload().getDbSNP(); String url = dbSNP.getHost(); - saveVersionData(EtlCommons.VARIATION_DATA, DBSNP_NAME, dbSNP.getVersion(), getTimeStamp(), - Collections.singletonList(url), variation.resolve("dbSnpVersion.json")); - return downloadFile(url, variation.resolve(EtlCommons.DBSNP_FILE).toString()); + saveVersionData(VARIATION_DATA, DBSNP_NAME, dbSNP.getVersion(), getTimeStamp(), + Collections.singletonList(url), variation.resolve(DBSNP_VERSION_FILENAME)); + + Path outPath = variation.resolve(Paths.get(url).getFileName()); + logger.info("Downloading {} to {} ...", url, outPath); + return downloadFile(url, outPath.toString()); } return null; } From b75b1f757493443d0fbad66d49413ce823b1dfa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 11 Mar 2024 12:31:46 +0100 Subject: [PATCH 06/26] lib: some improvements in downloading dbSNP data, #TASK-5816, #TASK-5789 --- .../admin/executors/BuildCommandExecutor.java | 18 +- .../cellbase/lib/builders/DbSnpBuilder.java | 174 ++++++++++++++++++ .../lib/builders/VariationBuilder.java | 131 ++----------- 3 files changed, 197 insertions(+), 126 deletions(-) create mode 100644 cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/DbSnpBuilder.java diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java index 35814cabe3..732be3d35d 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java @@ -38,7 +38,7 @@ import java.util.Collections; import java.util.List; -import static org.opencb.cellbase.lib.EtlCommons.PHARMGKB_DATA; +import static org.opencb.cellbase.lib.EtlCommons.*; /** * Created by imedina on 03/02/15. @@ -279,11 +279,17 @@ private CellBaseBuilder buildRefSeq() { } private CellBaseBuilder buildVariation() { - Path variationFunctionalScorePath = downloadFolder.resolve("variation"); - copyVersionFiles(Arrays.asList(variationFunctionalScorePath.resolve("dbSnpVersion.json"))); - Path variationFilePath = variationFunctionalScorePath.resolve(EtlCommons.DBSNP_FILE); - CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "dbSNP"); - return new VariationBuilder(variationFilePath, serializer); + Path downloadVariationPath = downloadFolder.resolve(VARIATION_DATA); + Path buildVariationPath = buildFolder.resolve(VARIATION_DATA); + if (!buildVariationPath.toFile().exists()) { + buildVariationPath.toFile().mkdirs(); + } + + CellBaseFileSerializer variationSerializer = new CellBaseJsonFileSerializer(buildVariationPath); + + // Currently, only dbSNP data + copyVersionFiles(Collections.singletonList(downloadVariationPath.resolve(DBSNP_VERSION_FILENAME))); + return new VariationBuilder(downloadVariationPath, variationSerializer, configuration); } private CellBaseBuilder buildCadd() { diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/DbSnpBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/DbSnpBuilder.java new file mode 100644 index 0000000000..f03d9540b9 --- /dev/null +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/DbSnpBuilder.java @@ -0,0 +1,174 @@ +/* + * Copyright 2015-2020 OpenCB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.opencb.cellbase.lib.builders; + +import org.opencb.biodata.models.core.Snp; +import org.opencb.biodata.models.core.SnpAnnotation; +import org.opencb.biodata.models.variant.avro.PopulationFrequency; +import org.opencb.cellbase.core.config.DownloadProperties; +import org.opencb.cellbase.core.serializer.CellBaseFileSerializer; +import org.opencb.cellbase.core.serializer.CellBaseSerializer; +import org.opencb.commons.utils.FileUtils; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static org.opencb.cellbase.lib.EtlCommons.DBSNP_NAME; + +/** + * Created by imedina on 06/11/15. + */ +public class DbSnpBuilder extends CellBaseBuilder { + + private Path sourceVariationPath; + private DownloadProperties.URLProperties dbSnpUrlProperties; + + public DbSnpBuilder(Path sourceVariationPath, DownloadProperties.URLProperties dbSnpUrlProperties, CellBaseSerializer serializer) { + super(serializer); + this.sourceVariationPath = sourceVariationPath; + this.dbSnpUrlProperties = dbSnpUrlProperties; + + logger = LoggerFactory.getLogger(DbSnpBuilder.class); + } + + /* Example: + ## dbSNP 156 + #CHROM POS ID REF ALT QUAL FILTER INFO + NC_000001.11 926003 rs1329301928 C A,T . . RS=1329301928;dbSNPBuildID=151;SSR=0; + GENEINFO=SAMD11:148398|LOC107985728:107985728;VC=SNV;NSM;R5;GNO; + FREQ=Estonian:0.9998,0.0002232,.|TOMMO:0.9999,.,0.0001062|dbGaP_PopFreq:0.9999,5.4e-05,0; + CLNVI=.,.,;CLNORIGIN=.,.,1;CLNSIG=.,.,0;CLNDISDB=.,.,MedGen:CN517202;CLNDN=.,.,not_provided;CLNREVSTAT=.,.,single; + CLNACC=.,.,RCV001929748.1;CLNHGVS=NC_000001.11:g.926003=,NC_000001.11:g.926003C>A,NC_000001.11:g.926003C>T + NC_000001.11 925952 rs1640863258 G A . . RS=1640863258;SSR=0; + GENEINFO=SAMD11:148398|LOC107985728:107985728;VC=SNV;NSM;R5;CLNVI=.,;CLNORIGIN=.,1;CLNSIG=.,0;CLNDISDB=.,MedGen:CN517202; + CLNDN=.,not_provided;CLNREVSTAT=.,single;CLNACC=.,RCV001318826.4;CLNHGVS=NC_000001.11:g.925952=,NC_000001.11:g.925952G>A + NC_000001.11 925953 rs1349221494 G A,T . . RS=1349221494;dbSNPBuildID=151;SSR=0; + GENEINFO=SAMD11:148398|LOC107985728:107985728;VC=SNV;SYN;R5;GNO; + FREQ=GnomAD:1,1.426e-05,.|GnomAD_exomes:1,.,4.008e-06|TOPMED:1,3.778e-06,.|dbGaP_PopFreq:1,0,3.124e-05 + NC_000001.11 925956 rs1342334044 C T . . RS=1342334044;dbSNPBuildID=155;SSR=0; + GENEINFO=SAMD11:148398|LOC107985728:107985728;VC=SNV;SYN;R5;GNO; + FREQ=TOPMED:1,1.133e-05|dbGaP_PopFreq:1,0; + CLNVI=.,;CLNORIGIN=.,1;CLNSIG=.,3;CLNDISDB=.,MedGen:CN517202;CLNDN=.,not_provided;CLNREVSTAT=.,single;CLNACC=.,RCV002170030.3; + CLNHGVS=NC_000001.11:g.925956=,NC_000001.11:g.925956C>T + */ + @Override + public void parse() throws Exception { + Path dbSnpFilePath = sourceVariationPath.resolve(Paths.get(dbSnpUrlProperties.getHost()).getFileName()); + FileUtils.checkPath(dbSnpFilePath); + + CellBaseFileSerializer fileSerializer = (CellBaseFileSerializer) serializer; + + String line; + String[] fields; + + String currentChromosome = null; + String chromosome = null; + int position; + String id; + String ref; + String[] alt; + String info; + List flags; + + try (BufferedReader bufferedReader = FileUtils.newBufferedReader(dbSnpFilePath)) { + while ((line = bufferedReader.readLine()) != null) { + if (!line.startsWith("#")) { + fields = line.split("\t"); + + // This only happens the first time, when we start reading the file + if (chromosome == null) { + logger.info("Parsing chr {} ", fields[0]); + currentChromosome = fields[0]; + chromosome = fields[0].split("\\.")[0]; + } + + position = Integer.parseInt(fields[1]); + id = fields[2]; + ref = fields[3]; + alt = fields[4].split(","); + info = fields[7]; + + String[] infoFields = info.split(";"); + flags = new ArrayList<>(); + SnpAnnotation snpAnnotation = new SnpAnnotation(); + for (String infoField : infoFields) { + String[] infoKeyValue = infoField.split("="); + switch (infoKeyValue[0]) { + case "GENEINFO": { + snpAnnotation.setGene(infoKeyValue[1].split(":")[0]); + break; + } + case "FREQ": { + String[] studies = infoKeyValue[1].split("\\|"); + List populationFrequencies = new ArrayList<>(); + for (String study : studies) { + String[] freqFields = study.split("[:,]"); + if (freqFields.length == alt.length + 2) { + for (int i = 0; i < alt.length; i++) { + if (".".equals(freqFields[i + 2])) { + logger.warn("Skipping pop. frequency for alt. allele ({}) of study {}: it is '.')", + alt[i], freqFields[0]); + } else { + PopulationFrequency populationFrequency = new PopulationFrequency(); + // Set study + populationFrequency.setStudy(freqFields[0]); + // Set reference + populationFrequency.setRefAllele(ref); + populationFrequency.setRefAlleleFreq(Float.parseFloat(freqFields[1])); + // Set alternate + populationFrequency.setAltAllele(alt[i]); + populationFrequency.setAltAlleleFreq(Float.parseFloat(freqFields[i + 2])); + + populationFrequencies.add(populationFrequency); + } + } + } else { + logger.warn("Skipping pop. frequencies for study {}: the number of prop. frequencies ({}) does not" + + " match the number of alleles ({})", freqFields[0], freqFields.length - 1, + alt.length + 1); + } + } + snpAnnotation.setPopulationFrequencies(populationFrequencies); + break; + } + default: { + if (infoKeyValue.length == 1) { + flags.add(infoKeyValue[0]); + } + } + } + } + snpAnnotation.setFlags(flags); + + if (!currentChromosome.equals(fields[0])) { + logger.info("Parsing chr {} ", fields[0]); + } + + Snp snp = new Snp(id, chromosome, position, ref, Arrays.asList(alt), "SNV", DBSNP_NAME, dbSnpUrlProperties.getVersion(), + snpAnnotation); + fileSerializer.serialize(snp, DBSNP_NAME); + } + } + } + logger.info("Parsing finished."); + } +} diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/VariationBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/VariationBuilder.java index e00137505b..087a4aed36 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/VariationBuilder.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/VariationBuilder.java @@ -16,142 +16,33 @@ package org.opencb.cellbase.lib.builders; -import org.opencb.biodata.models.core.Snp; -import org.opencb.biodata.models.core.SnpAnnotation; -import org.opencb.biodata.models.variant.avro.PopulationFrequency; -import org.opencb.cellbase.core.serializer.CellBaseSerializer; -import org.opencb.commons.utils.FileUtils; +import org.opencb.cellbase.core.config.CellBaseConfiguration; +import org.opencb.cellbase.core.config.DownloadProperties; +import org.opencb.cellbase.core.serializer.CellBaseFileSerializer; import org.slf4j.LoggerFactory; -import java.io.BufferedReader; import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; /** * Created by imedina on 06/11/15. */ public class VariationBuilder extends CellBaseBuilder { - private Path dbSnpFilePath; + private DbSnpBuilder dbSnpBuilder; - private static final int CHUNK_SIZE = 1000; - private static final int DECIMAL_RESOLUTION = 100; + public VariationBuilder(Path downloadVariationPath, CellBaseFileSerializer fileSerializer, CellBaseConfiguration configuration) { + super(fileSerializer); - public VariationBuilder(Path dbSnpFilePath, CellBaseSerializer serializer) { - super(serializer); - this.dbSnpFilePath = dbSnpFilePath; + // dbSNP + DownloadProperties.URLProperties dbSnpUrlProperties = configuration.getDownload().getDbSNP(); + dbSnpBuilder = new DbSnpBuilder(downloadVariationPath, dbSnpUrlProperties, fileSerializer); logger = LoggerFactory.getLogger(VariationBuilder.class); } - /* Example: - ## dbSNP 156 - #CHROM POS ID REF ALT QUAL FILTER INFO - NC_000001.11 926003 rs1329301928 C A,T . . RS=1329301928;dbSNPBuildID=151;SSR=0; - GENEINFO=SAMD11:148398|LOC107985728:107985728;VC=SNV;NSM;R5;GNO; - FREQ=Estonian:0.9998,0.0002232,.|TOMMO:0.9999,.,0.0001062|dbGaP_PopFreq:0.9999,5.4e-05,0; - CLNVI=.,.,;CLNORIGIN=.,.,1;CLNSIG=.,.,0;CLNDISDB=.,.,MedGen:CN517202;CLNDN=.,.,not_provided;CLNREVSTAT=.,.,single; - CLNACC=.,.,RCV001929748.1;CLNHGVS=NC_000001.11:g.926003=,NC_000001.11:g.926003C>A,NC_000001.11:g.926003C>T - NC_000001.11 925952 rs1640863258 G A . . RS=1640863258;SSR=0; - GENEINFO=SAMD11:148398|LOC107985728:107985728;VC=SNV;NSM;R5;CLNVI=.,;CLNORIGIN=.,1;CLNSIG=.,0;CLNDISDB=.,MedGen:CN517202; - CLNDN=.,not_provided;CLNREVSTAT=.,single;CLNACC=.,RCV001318826.4;CLNHGVS=NC_000001.11:g.925952=,NC_000001.11:g.925952G>A - NC_000001.11 925953 rs1349221494 G A,T . . RS=1349221494;dbSNPBuildID=151;SSR=0; - GENEINFO=SAMD11:148398|LOC107985728:107985728;VC=SNV;SYN;R5;GNO; - FREQ=GnomAD:1,1.426e-05,.|GnomAD_exomes:1,.,4.008e-06|TOPMED:1,3.778e-06,.|dbGaP_PopFreq:1,0,3.124e-05 - NC_000001.11 925956 rs1342334044 C T . . RS=1342334044;dbSNPBuildID=155;SSR=0; - GENEINFO=SAMD11:148398|LOC107985728:107985728;VC=SNV;SYN;R5;GNO; - FREQ=TOPMED:1,1.133e-05|dbGaP_PopFreq:1,0; - CLNVI=.,;CLNORIGIN=.,1;CLNSIG=.,3;CLNDISDB=.,MedGen:CN517202;CLNDN=.,not_provided;CLNREVSTAT=.,single;CLNACC=.,RCV002170030.3; - CLNHGVS=NC_000001.11:g.925956=,NC_000001.11:g.925956C>T - */ @Override public void parse() throws Exception { - FileUtils.checkPath(dbSnpFilePath); - - BufferedReader bufferedReader = FileUtils.newBufferedReader(dbSnpFilePath); - - String line; - String[] fields; - - String currentChromosome = null; - String chromosome = null; - int position; - String id; - String ref; - String[] alt; - String info; - Map infoMap; - List flags; - - while ((line = bufferedReader.readLine()) != null) { - if (!line.startsWith("#")) { - fields = line.split("\t"); - - // this only happens the first time, when we start reading the file - if (chromosome == null) { - logger.info("Parsing chr {} ", fields[0]); - currentChromosome = fields[0]; - chromosome = fields[0].split("\\.")[0]; - } - - position = Integer.parseInt(fields[1]); - id = fields[2]; - ref = fields[3]; - alt = fields[4].split(","); - info = fields[7]; - - String[] infoFields = info.split(";"); - flags = new ArrayList<>(); - SnpAnnotation snpAnnotation = new SnpAnnotation(); - for (String infoField : infoFields) { - String[] infoKeyValue = infoField.split("="); - switch (infoKeyValue[0]) { - case "GENEINFO": - snpAnnotation.setGene(infoKeyValue[1].split(":")[0]); - break; - case "FREQ": - String[] studies = infoKeyValue[1].split("\\|"); - List populationFrequencies = new ArrayList<>(); - for (String study : studies) { - String[] freqFields = study.split("[:,]"); - for (int i = 0; i < alt.length; i++) { - if (!freqFields[i + 2].equals(".")) { - PopulationFrequency populationFrequency = new PopulationFrequency(); - populationFrequency.setStudy(freqFields[0]); - System.out.println(freqFields[i + 1]); - populationFrequency.setRefAlleleFreq(Float.parseFloat(freqFields[i + 1])); - // freqFields[1] is the 'ref' allele ALT freq - populationFrequency.setAltAllele(alt[i]); - populationFrequency.setAltAlleleFreq(Float.parseFloat(freqFields[i + 2])); - populationFrequencies.add(populationFrequency); - } - } - } - snpAnnotation.setPopulationFrequencies(populationFrequencies); - break; - default: - if (infoKeyValue.length == 1) { - flags.add(infoKeyValue[0]); - } - } - } - snpAnnotation.setFlags(flags); - - if (!currentChromosome.equals(fields[0])) { - logger.info("Parsing chr {} ", fields[0]); - - } - - Snp snp = new Snp(id, chromosome, position, ref, Arrays.asList(alt), "SNV", "dbSNP", "156", snpAnnotation); - serializer.serialize(snp); - } - } - - serializer.close(); - bufferedReader.close(); - logger.info("Parsing finished."); + // Parsing dbSNP data + dbSnpBuilder.parse(); } } From 4e68dab39fa0f7e663b69639c8b891bf070f1892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 11 Mar 2024 13:29:57 +0100 Subject: [PATCH 07/26] lib: load dbSNP data in the CellBase MongoDB collection 'snp', #TASK-5817, #TASK-5789 --- .../admin/executors/BuildCommandExecutor.java | 5 +- .../admin/executors/LoadCommandExecutor.java | 55 ++++++++++++++----- .../org/opencb/cellbase/lib/EtlCommons.java | 1 + .../src/main/resources/mongodb-indexes.json | 3 + 4 files changed, 49 insertions(+), 15 deletions(-) diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java index 732be3d35d..16db1f82bc 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java @@ -278,7 +278,7 @@ private CellBaseBuilder buildRefSeq() { return new RefSeqGeneBuilder(refseqFolderPath, speciesConfiguration, serializer); } - private CellBaseBuilder buildVariation() { + private CellBaseBuilder buildVariation() throws IOException { Path downloadVariationPath = downloadFolder.resolve(VARIATION_DATA); Path buildVariationPath = buildFolder.resolve(VARIATION_DATA); if (!buildVariationPath.toFile().exists()) { @@ -288,7 +288,8 @@ private CellBaseBuilder buildVariation() { CellBaseFileSerializer variationSerializer = new CellBaseJsonFileSerializer(buildVariationPath); // Currently, only dbSNP data - copyVersionFiles(Collections.singletonList(downloadVariationPath.resolve(DBSNP_VERSION_FILENAME))); + Files.copy(downloadVariationPath.resolve(DBSNP_VERSION_FILENAME), buildVariationPath.resolve(DBSNP_VERSION_FILENAME), + StandardCopyOption.REPLACE_EXISTING); return new VariationBuilder(downloadVariationPath, variationSerializer, configuration); } diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java index 5a8fd9417b..97460d5a71 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java @@ -44,6 +44,8 @@ import java.util.List; import java.util.concurrent.ExecutionException; +import static org.opencb.cellbase.lib.EtlCommons.*; + /** * Created by imedina on 03/02/15. */ @@ -372,30 +374,57 @@ private void checkParameters() throws CellBaseException { private void loadVariationData() throws NoSuchMethodException, InterruptedException, ExecutionException, InstantiationException, IllegalAccessException, InvocationTargetException, ClassNotFoundException, IOException, LoaderException, CellBaseException { + Path variationPath = input.resolve(VARIATION_DATA); // First load data - // Common loading process from CellBase variation data models if (field == null) { - DirectoryStream stream = Files.newDirectoryStream(input, + // Common loading process from CellBase variation data models + DirectoryStream stream = Files.newDirectoryStream(variationPath, entry -> entry.getFileName().toString().startsWith("variation_chr")); + int numLoadings = 0; for (Path entry : stream) { logger.info("Loading file '{}'", entry); - loadRunner.load(input.resolve(entry.getFileName()), "variation", dataRelease); + loadRunner.load(variationPath.resolve(entry.getFileName()), "variation", dataRelease); + numLoadings++; } - // Create index - createIndex("variation"); - - // Update release (collection and sources) - List sources = new ArrayList<>(Arrays.asList( - input.resolve("ensemblVariationVersion.json") - )); - dataReleaseManager.update(dataRelease, "variation", EtlCommons.VARIATION_DATA, sources); + if (numLoadings > 0) { + // Create index + createIndex("variation"); + // Update release (collection and sources) + List sources = new ArrayList<>(Arrays.asList( + variationPath.resolve("ensemblVariationVersion.json") + )); + dataReleaseManager.update(dataRelease, "variation", EtlCommons.VARIATION_DATA, sources); + } else { + logger.info("Any variation file 'variation_chr...' found within folder '{}'", variationPath); + } + } else { // Custom update required e.g. population freqs loading + logger.info("Loading file '{}'", variationPath); + loadRunner.load(variationPath, "variation", dataRelease, field, innerFields); + } + + // Load dbSNP + Path dbSnpFilePath = variationPath.resolve(DBSNP_NAME + ".json.gz"); + if (dbSnpFilePath.toFile().exists()) { + if (variationPath.resolve(DBSNP_VERSION_FILENAME).toFile().exists()) { + logger.info("Loading dbSNP file '{}'", dbSnpFilePath); + loadRunner.load(dbSnpFilePath, SNP_COLLECTION_NAME, dataRelease); + + // Create index + createIndex(SNP_COLLECTION_NAME); + + // Update release (collection and sources) + List sources = Collections.singletonList(variationPath.resolve(DBSNP_VERSION_FILENAME)); + dataReleaseManager.update(dataRelease, SNP_COLLECTION_NAME, EtlCommons.VARIATION_DATA, sources); + } else { + logger.warn("In order to load the dbSNP file you need the version file {} within the folder '{}'", DBSNP_VERSION_FILENAME, + variationPath); + } } else { - logger.info("Loading file '{}'", input); - loadRunner.load(input, "variation", dataRelease, field, innerFields); + logger.warn("Any dbSNP file found within the folder '{}'", variationPath); } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java index 26c595598f..8d5b4c55c4 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java @@ -67,6 +67,7 @@ public class EtlCommons { public static final String DBSNP_FILE = "GCF_000001405.40.gz"; public static final String DBSNP_NAME = "dbSNP"; public static final String DBSNP_VERSION_FILENAME = DBSNP_NAME + "Version.json"; + public static final String SNP_COLLECTION_NAME = "snp"; public static final String STRUCTURAL_VARIANTS_DATA = "svs"; public static final String REPEATS_DATA = "repeats"; diff --git a/cellbase-lib/src/main/resources/mongodb-indexes.json b/cellbase-lib/src/main/resources/mongodb-indexes.json index de81c7b83b..93b86af2df 100644 --- a/cellbase-lib/src/main/resources/mongodb-indexes.json +++ b/cellbase-lib/src/main/resources/mongodb-indexes.json @@ -145,3 +145,6 @@ {"collection": "pharmacogenomics", "fields": {"variants.phenotypeType": 1}, "options": {"background": true}} {"collection": "pharmacogenomics", "fields": {"variants.confidence": 1}, "options": {"background": true}} {"collection": "pharmacogenomics", "fields": {"variants.evidences.pubmed": 1}, "options": {"background": true}} + +{"collection": "snp", "fields": {"id": 1}, "options": {"background": true}} +{"collection": "snp", "fields": {"chromosome": 1, "position": 1, "reference": 1}, "options": {"background": true}} \ No newline at end of file From 2d6ca9606d93347d578782d2a922ff9f8c8de137 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 11 Mar 2024 17:59:21 +0100 Subject: [PATCH 08/26] lib: implement SnpMongoDBAdaptor, #TASK-5794, TASK-5789 --- .../opencb/cellbase/core/api/SnpQuery.java | 96 ++++++++++++ .../lib/impl/core/MongoDBAdaptorFactory.java | 4 + .../lib/impl/core/SnpMongoDBAdaptor.java | 143 ++++++++++++++++++ 3 files changed, 243 insertions(+) create mode 100644 cellbase-core/src/main/java/org/opencb/cellbase/core/api/SnpQuery.java create mode 100644 cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/SnpMongoDBAdaptor.java diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/api/SnpQuery.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/api/SnpQuery.java new file mode 100644 index 0000000000..94fb961bff --- /dev/null +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/api/SnpQuery.java @@ -0,0 +1,96 @@ +/* + * Copyright 2015-2020 OpenCB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.opencb.cellbase.core.api; + +import org.opencb.cellbase.core.api.query.AbstractQuery; +import org.opencb.cellbase.core.api.query.QueryException; +import org.opencb.cellbase.core.api.query.QueryParameter; + +import java.util.List; +import java.util.Map; + +public class SnpQuery extends AbstractQuery { + + @QueryParameter(id = "id") + private List ids; + @QueryParameter(id = "chromosome") + private String chromosome; + @QueryParameter(id = "position") + private String position; + @QueryParameter(id = "reference") + private String reference; + + public SnpQuery() { + } + + public SnpQuery(Map params) throws QueryException { + super(params); + } + + @Override + protected void validateQuery() { + // nothing to validate + return; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("SnpQuery{"); + sb.append("ids=").append(ids); + sb.append(", chromosome='").append(chromosome).append('\''); + sb.append(", position='").append(position).append('\''); + sb.append(", reference='").append(reference).append('\''); + sb.append('}'); + return sb.toString(); + } + + public List getIds() { + return ids; + } + + public SnpQuery setIds(List ids) { + this.ids = ids; + return this; + } + + public String getChromosome() { + return chromosome; + } + + public SnpQuery setChromosome(String chromosome) { + this.chromosome = chromosome; + return this; + } + + public String getPosition() { + return position; + } + + public SnpQuery setPosition(String position) { + this.position = position; + return this; + } + + public String getReference() { + return reference; + } + + public SnpQuery setReference(String reference) { + this.reference = reference; + return this; + } +} diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactory.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactory.java index e120e0ae51..8912840bd5 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactory.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactory.java @@ -95,6 +95,10 @@ public PharmacogenomicsMongoDBAdaptor getPharmacogenomicsMongoDBAdaptor() { return new PharmacogenomicsMongoDBAdaptor(mongoDatastore); } + public SnpMongoDBAdaptor getSnpDBAdaptor() { + return new SnpMongoDBAdaptor(mongoDatastore); + } + @Override public String toString() { final StringBuilder sb = new StringBuilder("MongoDBAdaptorFactory{"); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/SnpMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/SnpMongoDBAdaptor.java new file mode 100644 index 0000000000..19f6f92aa0 --- /dev/null +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/SnpMongoDBAdaptor.java @@ -0,0 +1,143 @@ +/* + * Copyright 2015-2020 OpenCB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.opencb.cellbase.lib.impl.core; + +import com.mongodb.client.model.Filters; +import org.bson.Document; +import org.bson.conversions.Bson; +import org.opencb.biodata.models.core.Snp; +import org.opencb.cellbase.core.api.SnpQuery; +import org.opencb.cellbase.core.api.query.ProjectionQueryOptions; +import org.opencb.cellbase.core.exception.CellBaseException; +import org.opencb.cellbase.core.result.CellBaseDataResult; +import org.opencb.cellbase.lib.iterator.CellBaseIterator; +import org.opencb.cellbase.lib.iterator.CellBaseMongoDBIterator; +import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.commons.datastore.core.QueryParam; +import org.opencb.commons.datastore.mongodb.GenericDocumentComplexConverter; +import org.opencb.commons.datastore.mongodb.MongoDBCollection; +import org.opencb.commons.datastore.mongodb.MongoDBIterator; +import org.opencb.commons.datastore.mongodb.MongoDataStore; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import static org.opencb.cellbase.core.ParamConstants.API_KEY_PARAM; +import static org.opencb.cellbase.core.ParamConstants.DATA_RELEASE_PARAM; + +public class SnpMongoDBAdaptor extends CellBaseDBAdaptor implements CellBaseCoreDBAdaptor { + + private static final GenericDocumentComplexConverter CONVERTER; + + static { + CONVERTER = new GenericDocumentComplexConverter<>(Snp.class); + } + + public SnpMongoDBAdaptor(MongoDataStore mongoDataStore) { + super(mongoDataStore); + + this.init(); + } + + private void init() { + logger.debug("SnpMongoDBAdaptor: in 'constructor'"); + + mongoDBCollectionByRelease = buildCollectionByReleaseMap("snp"); + } + + @Override + public CellBaseIterator iterator(SnpQuery query) throws CellBaseException { + Bson bson = parseQuery(query); + Bson projection = getProjection(query); + QueryOptions queryOptions = query.toQueryOptions(); + + MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, query.getDataRelease()); + MongoDBIterator iterator = mongoDBCollection.iterator(null, bson, projection, CONVERTER, queryOptions); + return new CellBaseMongoDBIterator<>(iterator); + } + + @Override + public List> info(List ids, ProjectionQueryOptions queryOptions, int dataRelease, + String apiKey) throws CellBaseException { + List> results = new ArrayList<>(); + Bson projection = getProjection(queryOptions); + MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, dataRelease); + for (String id : ids) { + List orBsonList = new ArrayList<>(); + orBsonList.add(Filters.eq("id", id)); + Bson bson = Filters.or(orBsonList); + results.add(new CellBaseDataResult<>(mongoDBCollection.find(bson, projection, CONVERTER, new QueryOptions()))); + } + return results; + } + + @Override + public CellBaseDataResult count(SnpQuery query) { + return null; + } + + @Override + public CellBaseDataResult distinct(SnpQuery query) throws CellBaseException { + return null; + } + + @Override + public CellBaseDataResult aggregationStats(SnpQuery query) { + return null; + } + + @Override + public CellBaseDataResult groupBy(SnpQuery query) throws CellBaseException { + return null; + } + + public Bson parseQuery(SnpQuery query) { + List andBsonList = new ArrayList<>(); + try { + for (Map.Entry entry : query.toObjectMap().entrySet()) { + String dotNotationName = entry.getKey(); + Object value = entry.getValue(); + switch (dotNotationName) { + case "position": { + createAndOrQuery(value, dotNotationName, QueryParam.Type.INTEGER, andBsonList); + break; + } + case DATA_RELEASE_PARAM: + case API_KEY_PARAM: { + // Do nothing + break; + } + default: { + createAndOrQuery(value, dotNotationName, QueryParam.Type.STRING, andBsonList); + break; + } + } + } + } catch (IllegalAccessException e) { + e.printStackTrace(); + } + + logger.info("SNP parsed query: " + andBsonList); + if (andBsonList.size() > 0) { + System.out.println("SnpMongoDBAdaptor, parse query = " + andBsonList); + return Filters.and(andBsonList); + } else { + return new Document(); + } + } +} From 56b3eb61e96416f9296764ca9a81a8e274335916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 11 Mar 2024 18:00:48 +0100 Subject: [PATCH 09/26] server: add endpoint 'snp' and update endpotins variant/search variant/info to look for dbSNP ID, #TASK-5820, #TASK-5789 --- .../lib/impl/core/VariantMongoDBAdaptor.java | 77 ++++++++++++++++--- .../cellbase/lib/managers/VariantManager.java | 15 ++-- .../server/rest/genomic/VariantWSServer.java | 35 +++++++++ 3 files changed, 112 insertions(+), 15 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java index fc4b602cd9..c32e84f43f 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java @@ -18,11 +18,13 @@ import com.mongodb.bulk.BulkWriteResult; import com.mongodb.client.model.Filters; +import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.bson.Document; import org.bson.conversions.Bson; import org.opencb.biodata.models.core.GenomicScoreRegion; import org.opencb.biodata.models.core.Region; +import org.opencb.biodata.models.core.Snp; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.Score; import org.opencb.biodata.models.variant.avro.StructuralVariantType; @@ -40,6 +42,7 @@ import org.opencb.cellbase.lib.iterator.CellBaseIterator; import org.opencb.cellbase.lib.iterator.CellBaseMongoDBIterator; import org.opencb.cellbase.lib.iterator.VariantMongoDBIterator; +import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.core.QueryParam; @@ -50,6 +53,7 @@ import java.util.*; import java.util.function.Consumer; +import static org.opencb.cellbase.core.ParamConstants.API_KEY_PARAM; import static org.opencb.cellbase.core.ParamConstants.DATA_RELEASE_PARAM; import static org.opencb.cellbase.lib.MongoDBCollectionConfiguration.VARIATION_FUNCTIONAL_SCORE_CHUNK_SIZE; @@ -68,6 +72,7 @@ public class VariantMongoDBAdaptor extends CellBaseDBAdaptor implements CellBase private Map caddDBCollectionByRelease; + private Map snpDBCollectionByRelease; public VariantMongoDBAdaptor(MongoDataStore mongoDataStore) { super(mongoDataStore); @@ -80,6 +85,7 @@ private void init() { mongoDBCollectionByRelease = buildCollectionByReleaseMap("variation"); caddDBCollectionByRelease = buildCollectionByReleaseMap("variation_functional_score"); + snpDBCollectionByRelease = buildCollectionByReleaseMap("snp"); } public CellBaseDataResult next(Query query, QueryOptions options) { @@ -207,7 +213,7 @@ public CellBaseDataResult groupBy(Query query, List fields, QueryOptions } @Deprecated - private Bson parseQuery(Query query) { + private Bson parseQuery(Query query) throws CellBaseException { List andBsonList = new ArrayList<>(); createOrQuery(query, ParamConstants.QueryParams.CHROMOSOME.key(), "chromosome", andBsonList); @@ -221,7 +227,12 @@ private Bson parseQuery(Query query) { } createRegionQuery(query, ParamConstants.QueryParams.REGION.key(), MongoDBCollectionConfiguration.VARIATION_CHUNK_SIZE, andBsonList); - createOrQuery(query, ParamConstants.QueryParams.ID.key(), "id", andBsonList); + + if (StringUtils.isNotEmpty(query.getString(ParamConstants.QueryParams.ID.key()))) { + List variantIds = getVariantIds(query.getAsStringList(ParamConstants.QueryParams.ID.key()), + query.getInt(DATA_RELEASE_PARAM)); + createOrQuery(variantIds, "id", andBsonList); + } createImprecisePositionQuery(query, ParamConstants.QueryParams.CI_START_LEFT.key(), ParamConstants.QueryParams.CI_START_RIGHT.key(), @@ -243,16 +254,21 @@ private Bson parseQuery(Query query) { } } - public Bson parseQuery(VariantQuery query) { + public Bson parseQuery(VariantQuery query) throws CellBaseException { List andBsonList = new ArrayList<>(); try { for (Map.Entry entry : query.toObjectMap().entrySet()) { String dotNotationName = entry.getKey(); Object value = entry.getValue(); switch (dotNotationName) { + case "id": + List variantIds = getVariantIds(Arrays.asList(query.getId().split(",")), query.getDataRelease()); + createAndOrQuery(variantIds, dotNotationName, QueryParam.Type.STRING, andBsonList); + break; case "region": createRegionQuery(query, query.getRegions(), MongoDBCollectionConfiguration.VARIATION_CHUNK_SIZE, andBsonList); break; + case API_KEY_PARAM: case DATA_RELEASE_PARAM: case "svType": // don't do anything, this is parsed later @@ -283,7 +299,7 @@ public Bson parseQuery(VariantQuery query) { } } } catch (IllegalAccessException e) { - e.printStackTrace(); + throw new CellBaseException(e.getMessage()); } logger.debug("variant parsed query: " + andBsonList.toString()); @@ -745,13 +761,14 @@ public List> info(List ids, ProjectionQueryO throws CellBaseException { List> results = new ArrayList<>(); MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, dataRelease); - for (String id : ids) { - Bson projection = getProjection(queryOptions); - List orBsonList = new ArrayList<>(ids.size()); - orBsonList.add(Filters.eq("id", id)); - Bson bson = Filters.or(orBsonList); - results.add(new CellBaseDataResult<>(mongoDBCollection.find(bson, projection, Variant.class, new QueryOptions()))); + Bson projection = getProjection(queryOptions); + List variantIds = getVariantIds(ids, dataRelease); + List orBsonList = new ArrayList<>(variantIds.size()); + for (String variantId : variantIds) { + orBsonList.add(Filters.eq("id", variantId)); } + Bson bson = Filters.or(orBsonList); + results.add(new CellBaseDataResult<>(mongoDBCollection.find(bson, projection, Variant.class, new QueryOptions()))); return results; } @@ -776,6 +793,46 @@ public CellBaseDataResult getFunctionalScoreRegion(List(mongoDBCollection.find(bson, projection, GenomicScoreRegion.class, new QueryOptions())); } + + private List getVariantIds(List ids, int dataRelease) throws CellBaseException { + List variantIds = new ArrayList<>(); + List snpIds = new ArrayList<>(); + for (String id : ids) { + if (id.startsWith("rs")) { + snpIds.add(id); + } else { + variantIds.add(id); + } + } + if (CollectionUtils.isNotEmpty(snpIds)) { + List orBsonList = new ArrayList<>(); + for (String snpId : snpIds) { + orBsonList.add(Filters.eq("id", snpId)); + } + Bson query = Filters.or(orBsonList); + + MongoDBCollection mongoDBCollection = getCollectionByRelease(snpDBCollectionByRelease, dataRelease); + DataResult snpDataResult = mongoDBCollection.find(query, null, Snp.class, new QueryOptions()); + + Set results = new HashSet<>(); + + // Build the variant IDs + if (snpDataResult.getNumResults() > 0) { + for (Snp snp : snpDataResult.getResults()) { + for (String allele : snp.getAlleles()) { + results.add(snp.getChromosome() + ":" + snp.getPosition() + ":" + snp.getReference() + ":" + allele); + } + } + } + + // Add new variant IDs, if necessary + if (CollectionUtils.isNotEmpty(results)) { + variantIds.addAll(results); + } + } + + return variantIds; + } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java index 28f5c70fa7..2b90d40ebd 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java @@ -16,10 +16,7 @@ package org.opencb.cellbase.lib.managers; -import org.opencb.biodata.models.core.Gene; -import org.opencb.biodata.models.core.GenomicScoreRegion; -import org.opencb.biodata.models.core.Region; -import org.opencb.biodata.models.core.SpliceScore; +import org.opencb.biodata.models.core.*; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.VariantBuilder; import org.opencb.biodata.models.variant.avro.SampleEntry; @@ -27,7 +24,9 @@ import org.opencb.biodata.models.variant.avro.VariantAnnotation; import org.opencb.biodata.models.variant.avro.VariantType; import org.opencb.cellbase.core.ParamConstants; +import org.opencb.cellbase.core.api.SnpQuery; import org.opencb.cellbase.core.api.VariantQuery; +import org.opencb.cellbase.core.api.key.ApiKeyLicensedDataUtils; import org.opencb.cellbase.core.api.query.CellBaseQueryOptions; import org.opencb.cellbase.core.api.query.QueryException; import org.opencb.cellbase.core.config.CellBaseConfiguration; @@ -36,9 +35,9 @@ import org.opencb.cellbase.core.result.CellBaseDataResult; import org.opencb.cellbase.core.variant.AnnotationBasedPhasedQueryManager; import org.opencb.cellbase.lib.impl.core.CellBaseCoreDBAdaptor; +import org.opencb.cellbase.lib.impl.core.SnpMongoDBAdaptor; import org.opencb.cellbase.lib.impl.core.SpliceScoreMongoDBAdaptor; import org.opencb.cellbase.lib.impl.core.VariantMongoDBAdaptor; -import org.opencb.cellbase.core.api.key.ApiKeyLicensedDataUtils; import org.opencb.cellbase.lib.variant.VariantAnnotationUtils; import org.opencb.cellbase.lib.variant.annotation.CellBaseNormalizerSequenceAdaptor; import org.opencb.cellbase.lib.variant.annotation.VariantAnnotationCalculator; @@ -59,6 +58,7 @@ public class VariantManager extends AbstractManager implements AggregationApi getFunctionalScoreRegion(List(chunkIdSet), options, dataRelease); } + + public CellBaseDataResult getSnps(SnpQuery query) throws CellBaseException { + return snpDBAdaptor.query(query); + } } diff --git a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java index eba949398d..8d6983293a 100755 --- a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java +++ b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java @@ -18,8 +18,10 @@ import io.swagger.annotations.*; import org.apache.commons.lang.StringUtils; +import org.opencb.biodata.models.core.Snp; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.cellbase.core.api.SnpQuery; import org.opencb.cellbase.core.api.VariantQuery; import org.opencb.cellbase.core.models.DataRelease; import org.opencb.cellbase.core.result.CellBaseDataResult; @@ -493,6 +495,39 @@ public Response getAllConsequenceTypes() { } } + @GET + @Path("/snp") + @ApiOperation(httpMethod = "GET", value = "Get SNPs", + response = Snp.class, responseContainer = "QueryResponse") + @ApiImplicitParams({ + @ApiImplicitParam(name = "exclude", value = EXCLUDE_DESCRIPTION, + required = false, dataType = "java.util.List", paramType = "query"), + @ApiImplicitParam(name = "include", value = INCLUDE_DESCRIPTION, + required = false, dataType = "java.util.List", paramType = "query"), + @ApiImplicitParam(name = "sort", value = SORT_DESCRIPTION, + required = false, dataType = "java.util.List", paramType = "query"), + @ApiImplicitParam(name = "order", value = ORDER_DESCRIPTION, + required = false, dataType = "java.util.List", paramType = "query", + defaultValue = "", allowableValues="ASCENDING,DESCENDING"), + @ApiImplicitParam(name = "limit", value = LIMIT_DESCRIPTION, + required = false, defaultValue = DEFAULT_LIMIT, dataType = "java.util.List", + paramType = "query"), + @ApiImplicitParam(name = "skip", value = SKIP_DESCRIPTION, + required = false, defaultValue = DEFAULT_SKIP, dataType = "java.util.List", + paramType = "query") + }) + public Response getSnps(@QueryParam("id") @ApiParam(name = "id", value = "ID") String id, + @QueryParam("chromosome") @ApiParam(name = "chromosome", value = "Chromsome") String chromosome, + @QueryParam("position") @ApiParam(name = "position", value = "Position") Integer position) { + try { + SnpQuery query = new SnpQuery(uriParams); + CellBaseDataResult queryResult = variantManager.getSnps(query); + return createOkResponse(queryResult); + } catch (Exception e) { + return createErrorResponse(e); + } + } + // FIXME: 29/04/16 GET and POST web services to be fixed // @GET // @Path("/{variants}/consequenceType") From 82e57f37b3b0a777c7b558b89c3dbfa17945f7b3 Mon Sep 17 00:00:00 2001 From: imedina Date: Tue, 12 Mar 2024 00:57:59 +0000 Subject: [PATCH 10/26] Several improvements --- .../download/VariationDownloadManager.java | 2 +- .../lib/impl/core/OntologyMongoDBAdaptor.java | 5 - .../impl/core/PublicationMongoDBAdaptor.java | 5 - .../lib/impl/core/SnpMongoDBAdaptor.java | 43 +++++-- .../lib/impl/core/VariantMongoDBAdaptor.java | 33 +++-- .../server/rest/genomic/VariantWSServer.java | 113 ++---------------- 6 files changed, 56 insertions(+), 145 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java index 0f0f967831..7f317d5f7b 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java @@ -46,7 +46,7 @@ public DownloadFile downloadDbSnp() throws IOException, InterruptedException { return null; } if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { - logger.info("Downloading dbSNP scores information ..."); + logger.info("Downloading dbSNP information ..."); Path variation = downloadFolder.resolve(VARIATION_DATA); Files.createDirectories(variation); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/OntologyMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/OntologyMongoDBAdaptor.java index cacf8457d5..f1e664a508 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/OntologyMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/OntologyMongoDBAdaptor.java @@ -85,11 +85,6 @@ public List> info(List ids, ProjectionQ return results; } - @Override - public CellBaseDataResult count(OntologyQuery query) { - return null; - } - @Override public CellBaseDataResult distinct(OntologyQuery query) throws CellBaseException { Bson bsonDocument = parseQuery(query); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/PublicationMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/PublicationMongoDBAdaptor.java index a279f07653..5c8fcb571f 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/PublicationMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/PublicationMongoDBAdaptor.java @@ -86,11 +86,6 @@ public List> info(List ids, Projection return results; } - @Override - public CellBaseDataResult count(PublicationQuery query) { - return null; - } - @Override public CellBaseDataResult distinct(PublicationQuery query) throws CellBaseException { Bson bsonDocument = parseQuery(query); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/SnpMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/SnpMongoDBAdaptor.java index 19f6f92aa0..5a7eb2cb88 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/SnpMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/SnpMongoDBAdaptor.java @@ -17,6 +17,7 @@ package org.opencb.cellbase.lib.impl.core; import com.mongodb.client.model.Filters; +import com.mongodb.client.model.Projections; import org.bson.Document; import org.bson.conversions.Bson; import org.opencb.biodata.models.core.Snp; @@ -36,6 +37,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.regex.Pattern; import static org.opencb.cellbase.core.ParamConstants.API_KEY_PARAM; import static org.opencb.cellbase.core.ParamConstants.DATA_RELEASE_PARAM; @@ -72,8 +74,8 @@ public CellBaseIterator iterator(SnpQuery query) throws CellBaseException { } @Override - public List> info(List ids, ProjectionQueryOptions queryOptions, int dataRelease, - String apiKey) throws CellBaseException { + public List> info(List ids, ProjectionQueryOptions queryOptions, int dataRelease, String apiKey) + throws CellBaseException { List> results = new ArrayList<>(); Bson projection = getProjection(queryOptions); MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, dataRelease); @@ -86,14 +88,12 @@ public List> info(List ids, ProjectionQueryOptio return results; } - @Override - public CellBaseDataResult count(SnpQuery query) { - return null; - } - @Override public CellBaseDataResult distinct(SnpQuery query) throws CellBaseException { - return null; + Bson bsonQuery = parseQuery(query); + logger.info("snpQuery distinct: {}", bsonQuery.toBsonDocument().toJson()); + MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, query.getDataRelease()); + return new CellBaseDataResult<>(mongoDBCollection.distinct(query.getFacet(), bsonQuery, String.class)); } @Override @@ -103,7 +103,27 @@ public CellBaseDataResult aggregationStats(SnpQuery query) { @Override public CellBaseDataResult groupBy(SnpQuery query) throws CellBaseException { - return null; + Bson bsonQuery = parseQuery(query); + logger.info("snpQuery groupBy: {}", bsonQuery.toBsonDocument().toJson()); + MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, query.getDataRelease()); + return groupBy(bsonQuery, query, "name", mongoDBCollection); + } + + public CellBaseDataResult startsWith(String id, QueryOptions options, int dataRelease) throws CellBaseException { + Bson regex = Filters.regex("id", Pattern.compile("^" + id)); + Bson projection; + if (options.containsKey(QueryOptions.INCLUDE)) { + projection = Projections.include(options.getAsStringList(QueryOptions.INCLUDE)); + } else { + if (options.containsKey(QueryOptions.EXCLUDE)) { + projection = Projections.exclude(options.getAsStringList(QueryOptions.EXCLUDE)); + } else { + projection = Projections.exclude("annotation"); + } + } + + MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, dataRelease); + return new CellBaseDataResult<>(mongoDBCollection.find(regex, projection, CONVERTER, options)); } public Bson parseQuery(SnpQuery query) { @@ -132,9 +152,8 @@ public Bson parseQuery(SnpQuery query) { e.printStackTrace(); } - logger.info("SNP parsed query: " + andBsonList); - if (andBsonList.size() > 0) { - System.out.println("SnpMongoDBAdaptor, parse query = " + andBsonList); + logger.info("SnpMongoDBAdaptor parsed query: {}", andBsonList); + if (!andBsonList.isEmpty()) { return Filters.and(andBsonList); } else { return new Document(); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java index c32e84f43f..2b1a04c694 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java @@ -18,6 +18,7 @@ import com.mongodb.bulk.BulkWriteResult; import com.mongodb.client.model.Filters; +import com.mongodb.client.model.Projections; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.bson.Document; @@ -247,7 +248,7 @@ private Bson parseQuery(Query query) throws CellBaseException { "annotation.consequenceTypes.sequenceOntologyTerms.name", andBsonList); createGeneOrQuery(query, ParamConstants.QueryParams.GENE.key(), andBsonList); - if (andBsonList.size() > 0) { + if (!andBsonList.isEmpty()) { return Filters.and(andBsonList); } else { return new Document(); @@ -262,6 +263,7 @@ public Bson parseQuery(VariantQuery query) throws CellBaseException { Object value = entry.getValue(); switch (dotNotationName) { case "id": + // Both variant IDs and dbSNP IDs are allowed List variantIds = getVariantIds(Arrays.asList(query.getId().split(",")), query.getDataRelease()); createAndOrQuery(variantIds, dotNotationName, QueryParam.Type.STRING, andBsonList); break; @@ -302,8 +304,8 @@ public Bson parseQuery(VariantQuery query) throws CellBaseException { throw new CellBaseException(e.getMessage()); } - logger.debug("variant parsed query: " + andBsonList.toString()); - if (andBsonList.size() > 0) { + logger.debug("variant parsed query: {}", andBsonList); + if (!andBsonList.isEmpty()) { return Filters.and(andBsonList); } else { return new Document(); @@ -585,21 +587,11 @@ public CellBaseDataResult getFunctionalScoreVariant(Variant variant, Quer if (position >= chunkStart && position <= chunkEnd) { int offset = (position - chunkStart); ArrayList basicDBList = dbObject.get("values", ArrayList.class); - -// long l1 = 0L; // TODO: delete -// try { // TODO: delete long l1 = Long.parseLong(basicDBList.get(offset).toString()); -// l1 = (Long) basicDBList.get(offset); -// } catch (Exception e) { // TODO: delete -// logger.error("problematic variant: {}", variant.toString()); -// throw e; -// } - if (dbObject.getString("source").equalsIgnoreCase("cadd_raw")) { float value = 0f; switch (alternate.toLowerCase()) { case "a": -// value = ((short) (l1 >> 48) - 10000) / DECIMAL_RESOLUTION; value = (((short) (l1 >> 48)) / DECIMAL_RESOLUTION) - 10; break; case "c": @@ -618,7 +610,6 @@ public CellBaseDataResult getFunctionalScoreVariant(Variant variant, Quer .setScore(value) .setSource(dbObject.getString("source")) .setDescription(null) - // .setDescription("") .build()); } @@ -795,8 +786,9 @@ public CellBaseDataResult getFunctionalScoreRegion(List getVariantIds(List ids, int dataRelease) throws CellBaseException { - List variantIds = new ArrayList<>(); + List variantIds = new ArrayList<>(ids.size()); List snpIds = new ArrayList<>(); + // Split dbSNP IDs and variant IDs for (String id : ids) { if (id.startsWith("rs")) { snpIds.add(id); @@ -804,19 +796,22 @@ private List getVariantIds(List ids, int dataRelease) throws Cel variantIds.add(id); } } + + // Get the variant ID for the dbSNP ID if (CollectionUtils.isNotEmpty(snpIds)) { + // 1. Prepare the query List orBsonList = new ArrayList<>(); for (String snpId : snpIds) { orBsonList.add(Filters.eq("id", snpId)); } Bson query = Filters.or(orBsonList); + // 2. We must exclude as much information as possible to improve performance MongoDBCollection mongoDBCollection = getCollectionByRelease(snpDBCollectionByRelease, dataRelease); - DataResult snpDataResult = mongoDBCollection.find(query, null, Snp.class, new QueryOptions()); + DataResult snpDataResult = mongoDBCollection.find(query, Projections.exclude("annotation"), Snp.class, new QueryOptions()); + // 3. Build the variant IDs Set results = new HashSet<>(); - - // Build the variant IDs if (snpDataResult.getNumResults() > 0) { for (Snp snp : snpDataResult.getResults()) { for (String allele : snp.getAlleles()) { @@ -825,7 +820,7 @@ private List getVariantIds(List ids, int dataRelease) throws Cel } } - // Add new variant IDs, if necessary + // 4. Add new variant IDs, if necessary if (CollectionUtils.isNotEmpty(results)) { variantIds.addAll(results); } diff --git a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java index 8d6983293a..3bb57515a5 100755 --- a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java +++ b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java @@ -52,9 +52,9 @@ public VariantWSServer(@PathParam("apiVersion") @ApiParam(name = "apiVersion", v defaultValue = DEFAULT_VERSION) String apiVersion, @PathParam("species") @ApiParam(name = "species", value = SPECIES_DESCRIPTION) String species, @ApiParam(name = "assembly", value = ASSEMBLY_DESCRIPTION) @DefaultValue("") @QueryParam("assembly") - String assembly, + String assembly, @ApiParam(name = "dataRelease", value = DATA_RELEASE_DESCRIPTION) @DefaultValue("0") @QueryParam("dataRelease") - int dataRelease, + int dataRelease, @ApiParam(name = "apiKey", value = API_KEY_DESCRIPTION) @DefaultValue("") @QueryParam("apiKey") String apiKey, @Context UriInfo uriInfo, @Context HttpServletRequest hsr) throws CellBaseServerException { @@ -119,20 +119,6 @@ public Response getNormalization(@PathParam("variants") @ApiParam(name = "varian } - // @GET -// @Path("/{phenotype}/phenotype") -// @ApiOperation(httpMethod = "GET", -// value = "Not implemented yet", -// response = CellBaseDataResponse.class, hidden = true) -// public Response getVariantsByPhenotype(@PathParam("phenotype") String phenotype) { -// try { -// parseQueryParams(); -// return Response.ok("Not implemented").build(); -// } catch (Exception e) { -// return createErrorResponse(e); -// } -// } - @POST @Consumes("text/plain") @Path("/annotation") @@ -291,7 +277,7 @@ public Response getAnnotationByVariantsGET(@PathParam("variants") @QueryParam("checkAminoAcidChange") @ApiParam(name = "checkAminoAcidChange", value = "", allowableValues = "false,true", defaultValue = "false", required = false) - Boolean checkAminoAcidChange, + Boolean checkAminoAcidChange, @QueryParam("consequenceTypeSource") @ApiParam(name = "consequenceTypeSource", value = "Gene set, either ensembl (default) " + "or refseq", allowableValues = "ensembl,refseq", allowMultiple = true, @@ -360,29 +346,6 @@ private Response getAnnotationByVariant(String variants, } } -// @GET -// @Deprecated -// @Path("/{variants}/cadd") -// @ApiOperation(httpMethod = "GET", value = "Get CADD scores for a (list of) variant(s)", response = Score.class, -// responseContainer = "QueryResponse", hidden = true) -// public Response getCaddScoreByVariant(@PathParam("variants") -// @ApiParam(name = "variants", value = "Comma separated list of variants for" -// + "which CADD socores will be returned, e.g. " -// + "19:45411941:T:C,14:38679764:-:GATCTG,1:6635210:G:-," -// + "2:114340663:GCTGGGCATCCT:ACTGGGCATCCT", -// required = true) String variants) { -// try { -// parseQueryParams(); -// VariantDBAdaptor variantDBAdaptor = dbAdaptorFactory.getVariationDBAdaptor(this.species, this.assembly); -// -// List> functionalScoreVariant = -// variantDBAdaptor.getFunctionalScoreVariant(Variant.parseVariants(variants), queryOptions); -// return createOkResponse(functionalScoreVariant); -// } catch (Exception e) { -// return createErrorResponse(e); -// } -// } - // @GET // @Path("/stats") // @Override @@ -497,9 +460,8 @@ public Response getAllConsequenceTypes() { @GET @Path("/snp") - @ApiOperation(httpMethod = "GET", value = "Get SNPs", - response = Snp.class, responseContainer = "QueryResponse") - @ApiImplicitParams({ + @ApiOperation(httpMethod = "GET", value = "Get SNPs", response = Snp.class, responseContainer = "QueryResponse") + @ApiImplicitParams({ @ApiImplicitParam(name = "exclude", value = EXCLUDE_DESCRIPTION, required = false, dataType = "java.util.List", paramType = "query"), @ApiImplicitParam(name = "include", value = INCLUDE_DESCRIPTION, @@ -510,15 +472,14 @@ public Response getAllConsequenceTypes() { required = false, dataType = "java.util.List", paramType = "query", defaultValue = "", allowableValues="ASCENDING,DESCENDING"), @ApiImplicitParam(name = "limit", value = LIMIT_DESCRIPTION, - required = false, defaultValue = DEFAULT_LIMIT, dataType = "java.util.List", - paramType = "query"), + required = false, defaultValue = DEFAULT_LIMIT, dataType = "java.util.List", paramType = "query"), @ApiImplicitParam(name = "skip", value = SKIP_DESCRIPTION, - required = false, defaultValue = DEFAULT_SKIP, dataType = "java.util.List", - paramType = "query") + required = false, defaultValue = DEFAULT_SKIP, dataType = "java.util.List", paramType = "query") }) public Response getSnps(@QueryParam("id") @ApiParam(name = "id", value = "ID") String id, - @QueryParam("chromosome") @ApiParam(name = "chromosome", value = "Chromsome") String chromosome, - @QueryParam("position") @ApiParam(name = "position", value = "Position") Integer position) { + @QueryParam("chromosome") @ApiParam(name = "chromosome", value = "Chromosome") String chromosome, + @QueryParam("position") @ApiParam(name = "position", value = "Position") Integer position, + @QueryParam("reference") @ApiParam(name = "reference", value = "Reference") String reference) { try { SnpQuery query = new SnpQuery(uriParams); CellBaseDataResult queryResult = variantManager.getSnps(query); @@ -528,58 +489,4 @@ public Response getSnps(@QueryParam("id") @ApiParam(name = "id", value = "ID") S } } - // FIXME: 29/04/16 GET and POST web services to be fixed -// @GET -// @Path("/{variants}/consequenceType") -// @ApiOperation(httpMethod = "GET", value = "Get the biological impact of the variant(s)", response = String.class, -// responseContainer = "QueryResponse") -// public Response getConsequenceTypeByGetMethod(@PathParam("variants") String variants) { -// return getConsequenceType(variants); -// } -// -// private Response getConsequenceType(String variants) { -// try { -// parseQueryParams(); -// VariantDBAdaptor variationDBAdaptor = dbAdaptorFactory.getVariationDBAdaptor(this.species, this.assembly); -// query.put(VariantDBAdaptor.QueryParams.ID.key(), variants); -// queryOptions.put(QueryOptions.INCLUDE, "annotation.displayConsequenceType"); -// CellBaseDataResult queryResult = variationDBAdaptor.get(query, queryOptions); -// CellBaseDataResult queryResult1 = new CellBaseDataResult<>( -// queryResult.getId(), queryResult.getTime(), queryResult.getEvents(), queryResult.getNumResults(), -// Collections.singletonList(queryResult.getResults().get(0).getAnnotation().getDisplayConsequenceType()), 1); -// return createOkResponse(queryResult1); -// } catch (Exception e) { -// return createErrorResponse("getConsequenceTypeByPostMethod", e.toString()); -// } -// } - - // FIXME: 29/04/16 GET and POST methods to be fixed -// @GET -// @Path("/{variants}/regulatory") -// @ApiOperation(httpMethod = "GET", value = "Get the regulatory impact of the variant(s)", hidden = true) -// public Response getRegulatoryByGetMethod(@PathParam("variants") String variants) { -// return getRegulatoryType(variants); -// } -// -// private Response getRegulatoryType(String variants) { -// try { -// parseQueryParams(); -// VariantDBAdaptor variationDBAdaptor = dbAdaptorFactory.getVariationDBAdaptor(this.species, this.assembly); -// return null; -// } catch (Exception e) { -// return createErrorResponse(e); -// } -// } - -// @GET -// @Path("/{variants}/sequence") -// @ApiOperation(httpMethod = "GET", value = "Get the adjacent sequence to the SNP(s) - Not yet implemented", -// hidden = true) -// public Response getSequence(@PathParam("variants") String query) { -// try { -// return null; -// } catch (Exception e) { -// return createErrorResponse(e); -// } -// } } From bbccffe75ac1b2b6029858be91ca21ef51f303c0 Mon Sep 17 00:00:00 2001 From: imedina Date: Tue, 12 Mar 2024 02:34:45 +0000 Subject: [PATCH 11/26] Final dbSNP builder implemented --- .../cellbase/lib/builders/DbSnpBuilder.java | 92 ++++++++++++++----- 1 file changed, 67 insertions(+), 25 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/DbSnpBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/DbSnpBuilder.java index f03d9540b9..788bbdade4 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/DbSnpBuilder.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/DbSnpBuilder.java @@ -28,9 +28,7 @@ import java.io.BufferedReader; import java.nio.file.Path; import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; +import java.util.*; import static org.opencb.cellbase.lib.EtlCommons.DBSNP_NAME; @@ -39,8 +37,37 @@ */ public class DbSnpBuilder extends CellBaseBuilder { - private Path sourceVariationPath; - private DownloadProperties.URLProperties dbSnpUrlProperties; + private final Path sourceVariationPath; + private final DownloadProperties.URLProperties dbSnpUrlProperties; + private static final Map CHROMOSOME_MAPPING; + + static { + CHROMOSOME_MAPPING = new HashMap<>(); + CHROMOSOME_MAPPING.put("NC_000001", "1"); + CHROMOSOME_MAPPING.put("NC_000002", "2"); + CHROMOSOME_MAPPING.put("NC_000003", "3"); + CHROMOSOME_MAPPING.put("NC_000004", "4"); + CHROMOSOME_MAPPING.put("NC_000005", "5"); + CHROMOSOME_MAPPING.put("NC_000006", "6"); + CHROMOSOME_MAPPING.put("NC_000007", "7"); + CHROMOSOME_MAPPING.put("NC_000008", "8"); + CHROMOSOME_MAPPING.put("NC_000009", "9"); + CHROMOSOME_MAPPING.put("NC_000010", "10"); + CHROMOSOME_MAPPING.put("NC_000011", "11"); + CHROMOSOME_MAPPING.put("NC_000012", "12"); + CHROMOSOME_MAPPING.put("NC_000013", "13"); + CHROMOSOME_MAPPING.put("NC_000014", "14"); + CHROMOSOME_MAPPING.put("NC_000015", "15"); + CHROMOSOME_MAPPING.put("NC_000016", "16"); + CHROMOSOME_MAPPING.put("NC_000017", "17"); + CHROMOSOME_MAPPING.put("NC_000018", "18"); + CHROMOSOME_MAPPING.put("NC_000019", "19"); + CHROMOSOME_MAPPING.put("NC_000020", "20"); + CHROMOSOME_MAPPING.put("NC_000021", "21"); + CHROMOSOME_MAPPING.put("NC_000022", "22"); + CHROMOSOME_MAPPING.put("NC_000023", "X"); + CHROMOSOME_MAPPING.put("NC_000024", "Y"); + } public DbSnpBuilder(Path sourceVariationPath, DownloadProperties.URLProperties dbSnpUrlProperties, CellBaseSerializer serializer) { super(serializer); @@ -80,39 +107,57 @@ public void parse() throws Exception { String line; String[] fields; - String currentChromosome = null; - String chromosome = null; + String chromosome; int position; String id; String ref; String[] alt; + String type; + String version; String info; List flags; + Map additionalAttributes; + + SnpAnnotation snpAnnotation; try (BufferedReader bufferedReader = FileUtils.newBufferedReader(dbSnpFilePath)) { while ((line = bufferedReader.readLine()) != null) { if (!line.startsWith("#")) { fields = line.split("\t"); - // This only happens the first time, when we start reading the file - if (chromosome == null) { - logger.info("Parsing chr {} ", fields[0]); - currentChromosome = fields[0]; - chromosome = fields[0].split("\\.")[0]; - } - + chromosome = fields[0].split("\\.")[0]; + chromosome = CHROMOSOME_MAPPING.get(chromosome); position = Integer.parseInt(fields[1]); id = fields[2]; ref = fields[3]; alt = fields[4].split(","); + version = dbSnpUrlProperties.getVersion(); info = fields[7]; - String[] infoFields = info.split(";"); + // Calculate SNP type + type = "SNV"; + if (ref.length() > 1) { + type = "INDEL"; + } else { + for (String altAllele : alt) { + if (altAllele.length() > 1) { + type = "INDEL"; + break; + } + } + } + + snpAnnotation = new SnpAnnotation(); flags = new ArrayList<>(); - SnpAnnotation snpAnnotation = new SnpAnnotation(); + additionalAttributes = new HashMap<>(); + + String[] infoFields = info.split(";"); for (String infoField : infoFields) { String[] infoKeyValue = infoField.split("="); switch (infoKeyValue[0]) { + case "dbSNPBuildID": + version = infoKeyValue[1]; + break; case "GENEINFO": { snpAnnotation.setGene(infoKeyValue[1].split(":")[0]); break; @@ -121,6 +166,8 @@ public void parse() throws Exception { String[] studies = infoKeyValue[1].split("\\|"); List populationFrequencies = new ArrayList<>(); for (String study : studies) { + // After splitting 'GnomAD:1,1.426e-05,.' we get: + // freqFields: [GnomAD, 1, 1.426e-05, .] String[] freqFields = study.split("[:,]"); if (freqFields.length == alt.length + 2) { for (int i = 0; i < alt.length; i++) { @@ -129,12 +176,9 @@ public void parse() throws Exception { alt[i], freqFields[0]); } else { PopulationFrequency populationFrequency = new PopulationFrequency(); - // Set study populationFrequency.setStudy(freqFields[0]); - // Set reference populationFrequency.setRefAllele(ref); populationFrequency.setRefAlleleFreq(Float.parseFloat(freqFields[1])); - // Set alternate populationFrequency.setAltAllele(alt[i]); populationFrequency.setAltAlleleFreq(Float.parseFloat(freqFields[i + 2])); @@ -153,18 +197,16 @@ public void parse() throws Exception { default: { if (infoKeyValue.length == 1) { flags.add(infoKeyValue[0]); + } else { + additionalAttributes.put(infoKeyValue[0], infoKeyValue[1]); } } } } snpAnnotation.setFlags(flags); + snpAnnotation.setAdditionalAttributes(additionalAttributes); - if (!currentChromosome.equals(fields[0])) { - logger.info("Parsing chr {} ", fields[0]); - } - - Snp snp = new Snp(id, chromosome, position, ref, Arrays.asList(alt), "SNV", DBSNP_NAME, dbSnpUrlProperties.getVersion(), - snpAnnotation); + Snp snp = new Snp(id, chromosome, position, ref, Arrays.asList(alt), type, DBSNP_NAME, version, snpAnnotation); fileSerializer.serialize(snp, DBSNP_NAME); } } From 692cad2d4fde435a6ba094f8cc9c551ba41b7e62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 12 Mar 2024 11:31:04 +0100 Subject: [PATCH 12/26] lib: update VariantMongoDBAdaptor according to the SNP biodata changes, #TASK-5820, #TASK-5789 --- .../opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java index 2b1a04c694..1861fab024 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java @@ -814,8 +814,8 @@ private List getVariantIds(List ids, int dataRelease) throws Cel Set results = new HashSet<>(); if (snpDataResult.getNumResults() > 0) { for (Snp snp : snpDataResult.getResults()) { - for (String allele : snp.getAlleles()) { - results.add(snp.getChromosome() + ":" + snp.getPosition() + ":" + snp.getReference() + ":" + allele); + for (String alternate : snp.getAlternates()) { + results.add(snp.getChromosome() + ":" + snp.getPosition() + ":" + snp.getReference() + ":" + alternate); } } } From 90a4f6803d550d8bc1648bda3a4ce05285c08cc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 12 Mar 2024 11:31:50 +0100 Subject: [PATCH 13/26] lib: fix NPE, #TASK-5816, #TASK-5789 --- .../org/opencb/cellbase/lib/builders/DbSnpBuilder.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/DbSnpBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/DbSnpBuilder.java index 788bbdade4..4f128562e6 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/DbSnpBuilder.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/DbSnpBuilder.java @@ -171,9 +171,12 @@ public void parse() throws Exception { String[] freqFields = study.split("[:,]"); if (freqFields.length == alt.length + 2) { for (int i = 0; i < alt.length; i++) { - if (".".equals(freqFields[i + 2])) { - logger.warn("Skipping pop. frequency for alt. allele ({}) of study {}: it is '.')", - alt[i], freqFields[0]); + if (".".equals(freqFields[1])) { + logger.warn("Skipping pop. frequency for ref. allele ({}) of study {}: it is '.')", + ref, freqFields[0]); + } else if (".".equals(freqFields[i + 2])) { + logger.warn("Skipping pop. frequency for alt. allele ({}) of study {}: it is '.')", + alt[i], freqFields[0]); } else { PopulationFrequency populationFrequency = new PopulationFrequency(); populationFrequency.setStudy(freqFields[0]); From 36c64fe125014be39ca219e8ffeaa2f62f46685d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Wed, 13 Mar 2024 11:53:35 +0100 Subject: [PATCH 14/26] server: add the endpoints variant/snp/search and variant/snp/startsWith, #TASK-5820, #TASK-5789 --- .../lib/impl/core/SnpMongoDBAdaptor.java | 10 ++---- .../cellbase/lib/managers/VariantManager.java | 6 +++- .../server/rest/genomic/VariantWSServer.java | 32 +++++++++++++++++-- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/SnpMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/SnpMongoDBAdaptor.java index 5a7eb2cb88..56757d5ead 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/SnpMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/SnpMongoDBAdaptor.java @@ -111,15 +111,11 @@ public CellBaseDataResult groupBy(SnpQuery query) throws CellBaseException { public CellBaseDataResult startsWith(String id, QueryOptions options, int dataRelease) throws CellBaseException { Bson regex = Filters.regex("id", Pattern.compile("^" + id)); - Bson projection; + Bson projection = null; if (options.containsKey(QueryOptions.INCLUDE)) { projection = Projections.include(options.getAsStringList(QueryOptions.INCLUDE)); - } else { - if (options.containsKey(QueryOptions.EXCLUDE)) { - projection = Projections.exclude(options.getAsStringList(QueryOptions.EXCLUDE)); - } else { - projection = Projections.exclude("annotation"); - } + } else if (options.containsKey(QueryOptions.EXCLUDE)) { + projection = Projections.exclude(options.getAsStringList(QueryOptions.EXCLUDE)); } MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, dataRelease); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java index 2b90d40ebd..670585204d 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java @@ -355,7 +355,11 @@ public CellBaseDataResult getFunctionalScoreRegion(List(chunkIdSet), options, dataRelease); } - public CellBaseDataResult getSnps(SnpQuery query) throws CellBaseException { + public CellBaseDataResult searchSnp(SnpQuery query) throws CellBaseException { return snpDBAdaptor.query(query); } + + public CellBaseDataResult startsWithSnp(String id, QueryOptions options, int dataRelease) throws CellBaseException { + return snpDBAdaptor.startsWith(id, options, dataRelease); + } } diff --git a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java index 3bb57515a5..6792ebc401 100755 --- a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java +++ b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java @@ -459,7 +459,7 @@ public Response getAllConsequenceTypes() { } @GET - @Path("/snp") + @Path("/snp/search") @ApiOperation(httpMethod = "GET", value = "Get SNPs", response = Snp.class, responseContainer = "QueryResponse") @ApiImplicitParams({ @ApiImplicitParam(name = "exclude", value = EXCLUDE_DESCRIPTION, @@ -476,17 +476,43 @@ public Response getAllConsequenceTypes() { @ApiImplicitParam(name = "skip", value = SKIP_DESCRIPTION, required = false, defaultValue = DEFAULT_SKIP, dataType = "java.util.List", paramType = "query") }) - public Response getSnps(@QueryParam("id") @ApiParam(name = "id", value = "ID") String id, + public Response searchSnp(@QueryParam("id") @ApiParam(name = "id", value = "SNP ID") String id, @QueryParam("chromosome") @ApiParam(name = "chromosome", value = "Chromosome") String chromosome, @QueryParam("position") @ApiParam(name = "position", value = "Position") Integer position, @QueryParam("reference") @ApiParam(name = "reference", value = "Reference") String reference) { try { SnpQuery query = new SnpQuery(uriParams); - CellBaseDataResult queryResult = variantManager.getSnps(query); + CellBaseDataResult queryResult = variantManager.searchSnp(query); return createOkResponse(queryResult); } catch (Exception e) { return createErrorResponse(e); } } + @GET + @Path("/snp/startsWith") + @ApiOperation(httpMethod = "GET", value = "Get SNPs starting with the input SNP ID", response = Snp.class, + responseContainer = "QueryResponse") + @ApiImplicitParams({ + @ApiImplicitParam(name = "exclude", value = EXCLUDE_DESCRIPTION, + required = false, dataType = "java.util.List", paramType = "query"), + @ApiImplicitParam(name = "include", value = INCLUDE_DESCRIPTION, + required = false, dataType = "java.util.List", paramType = "query"), + @ApiImplicitParam(name = "limit", value = LIMIT_DESCRIPTION, + required = false, defaultValue = DEFAULT_LIMIT, dataType = "java.util.List", + paramType = "query") + }) + public Response startsWithSnp(@QueryParam("id") @ApiParam(name = "id", value = "SNP ID, e.g.: rs15703916") String id) { + try { + try { + SnpQuery query = new SnpQuery(uriParams); + CellBaseDataResult queryResult = variantManager.startsWithSnp(id, query.toQueryOptions(), getDataRelease()); + return createOkResponse(queryResult); + } catch (Exception e) { + return createErrorResponse(e); + } + } catch (Exception e) { + return createErrorResponse(e); + } + } } From 7670702515e7d34f49cb35537a3e294db652a0b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Wed, 13 Mar 2024 13:23:09 +0100 Subject: [PATCH 15/26] server: update variant annotator to return the dbSNP IDs in the field annotation.xrefs, #TASK-5821, #TASK-5789 --- .../opencb/cellbase/core/api/SnpQuery.java | 6 +- .../VariantAnnotationCalculator.java | 86 +++----------- .../futures/FutureSnpAnnotator.java | 105 +++++++++++++++++ .../futures/FutureSpliceScoreAnnotator.java | 109 ++++++++++++++++++ 4 files changed, 232 insertions(+), 74 deletions(-) create mode 100644 cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSnpAnnotator.java create mode 100644 cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSpliceScoreAnnotator.java diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/api/SnpQuery.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/api/SnpQuery.java index 94fb961bff..ade217f387 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/api/SnpQuery.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/api/SnpQuery.java @@ -30,7 +30,7 @@ public class SnpQuery extends AbstractQuery { @QueryParameter(id = "chromosome") private String chromosome; @QueryParameter(id = "position") - private String position; + private Integer position; @QueryParameter(id = "reference") private String reference; @@ -76,11 +76,11 @@ public SnpQuery setChromosome(String chromosome) { return this; } - public String getPosition() { + public Integer getPosition() { return position; } - public SnpQuery setPosition(String position) { + public SnpQuery setPosition(Integer position) { this.position = position; return this; } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java index a503ba7045..5b8444e6fe 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java @@ -40,6 +40,8 @@ import org.opencb.cellbase.lib.managers.*; import org.opencb.cellbase.lib.variant.VariantAnnotationUtils; import org.opencb.cellbase.lib.variant.annotation.futures.FuturePharmacogenomicsAnnotator; +import org.opencb.cellbase.lib.variant.annotation.futures.FutureSnpAnnotator; +import org.opencb.cellbase.lib.variant.annotation.futures.FutureSpliceScoreAnnotator; import org.opencb.cellbase.lib.variant.hgvs.HgvsCalculator; import org.opencb.commons.datastore.core.QueryOptions; import org.slf4j.Logger; @@ -465,6 +467,13 @@ private List runAnnotationProcess(List normalizedVar variationFuture = CACHED_THREAD_POOL.submit(futureVariationAnnotator); } + FutureSnpAnnotator futureSnpAnnotator = null; + Future>> snpFuture = null; + if (annotatorSet.contains("xrefs")) { + futureSnpAnnotator = new FutureSnpAnnotator(normalizedVariantList, dataRelease.getRelease(), variantManager, logger); + snpFuture = CACHED_THREAD_POOL.submit(futureSnpAnnotator); + } + FutureConservationAnnotator futureConservationAnnotator = null; Future>> conservationFuture = null; if (annotatorSet.contains("conservation")) { @@ -510,8 +519,8 @@ private List runAnnotationProcess(List normalizedVar FutureSpliceScoreAnnotator futureSpliceScoreAnnotator = null; Future>> spliceScoreFuture = null; if (annotatorSet.contains("consequenceType")) { - futureSpliceScoreAnnotator = new FutureSpliceScoreAnnotator(normalizedVariantList, QueryOptions.empty(), - dataRelease.getRelease()); + futureSpliceScoreAnnotator = new FutureSpliceScoreAnnotator(normalizedVariantList, dataRelease.getRelease(), apiKey, + variantManager, logger); spliceScoreFuture = CACHED_THREAD_POOL.submit(futureSpliceScoreAnnotator); } @@ -643,6 +652,9 @@ private List runAnnotationProcess(List normalizedVar if (futureVariationAnnotator != null) { futureVariationAnnotator.processResults(variationFuture, variantAnnotationList, annotatorSet); } + if (futureSnpAnnotator != null) { + futureSnpAnnotator.processResults(snpFuture, variantAnnotationList); + } if (futureConservationAnnotator != null) { futureConservationAnnotator.processResults(conservationFuture, variantAnnotationList); } @@ -1171,7 +1183,7 @@ private Set getAnnotatorSet(QueryOptions queryOptions) { // 'expression' removed in CB 5.0 annotatorSet = new HashSet<>(Arrays.asList("variation", "traitAssociation", "conservation", "functionalScore", "consequenceType", "geneDisease", "drugInteraction", "geneConstraints", "mirnaTargets", "pharmacogenomics", - "cancerGeneAssociation", "cancerHotspots", "populationFrequencies", "repeats", "cytoband", "hgvs")); + "cancerGeneAssociation", "cancerHotspots", "populationFrequencies", "repeats", "cytoband", "hgvs", "xrefs")); List excludeList = queryOptions.getAsStringList("exclude"); excludeList.forEach(annotatorSet::remove); } @@ -1909,74 +1921,6 @@ public void processResults(Future>> cytobandFu } } - class FutureSpliceScoreAnnotator implements Callable>> { - private List variantList; - private QueryOptions queryOptions; - private int dataRelease; - - FutureSpliceScoreAnnotator(List variantList, QueryOptions queryOptions, int dataRelease) { - this.variantList = variantList; - this.queryOptions = queryOptions; - this.dataRelease = dataRelease; - } - - @Override - public List> call() throws Exception { - long startTime = System.currentTimeMillis(); - - List> cellBaseDataResultList = new ArrayList<>(variantList.size()); - - logger.debug("Query splice"); - // Want to return only one CellBaseDataResult object per Variant - for (Variant variant : variantList) { - cellBaseDataResultList.add(variantManager.getSpliceScoreVariant(variant, apiKey, dataRelease)); - } - logger.debug("Splice score query performance is {}ms for {} variants", System.currentTimeMillis() - startTime, - variantList.size()); - return cellBaseDataResultList; - } - - public void processResults(Future>> spliceFuture, - List variantAnnotationList) - throws InterruptedException, ExecutionException { - List> spliceCellBaseDataResults; - try { - spliceCellBaseDataResults = spliceFuture.get(30, TimeUnit.SECONDS); - } catch (TimeoutException e) { - spliceFuture.cancel(true); - throw new ExecutionException("Unable to finish splice score query on time", e); - } - - if (CollectionUtils.isNotEmpty(spliceCellBaseDataResults)) { - for (int i = 0; i < variantAnnotationList.size(); i++) { - CellBaseDataResult spliceScoreResult = spliceCellBaseDataResults.get(i); - if (spliceScoreResult != null && CollectionUtils.isNotEmpty(spliceScoreResult.getResults())) { - for (SpliceScore spliceScore : spliceScoreResult.getResults()) { - for (ConsequenceType ct : variantAnnotationList.get(i).getConsequenceTypes()) { - for (SpliceScoreAlternate spliceScoreAlt : spliceScore.getAlternates()) { - String alt = StringUtils.isEmpty(variantAnnotationList.get(i).getAlternate()) - ? "-" - : variantAnnotationList.get(i).getAlternate(); - if (alt.equals(spliceScoreAlt.getAltAllele())) { - if (StringUtils.isEmpty(spliceScore.getTranscriptId()) - || StringUtils.isEmpty(ct.getTranscriptId()) - || spliceScore.getTranscriptId().equals(ct.getTranscriptId())) { - SpliceScores scores = new SpliceScores(spliceScore.getSource(), spliceScoreAlt.getScores()); - if (ct.getSpliceScores() == null) { - ct.setSpliceScores(new ArrayList<>()); - } - ct.getSpliceScores().add(scores); - } - } - } - } - } - } - } - } - } - } - public VariantNormalizer getNormalizer() { return normalizer; } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSnpAnnotator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSnpAnnotator.java new file mode 100644 index 0000000000..bc982d6587 --- /dev/null +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSnpAnnotator.java @@ -0,0 +1,105 @@ +/* + * Copyright 2015-2020 OpenCB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.opencb.cellbase.lib.variant.annotation.futures; + +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.opencb.biodata.models.core.Snp; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.biodata.models.variant.avro.Xref; +import org.opencb.cellbase.core.api.SnpQuery; +import org.opencb.cellbase.core.result.CellBaseDataResult; +import org.opencb.cellbase.lib.managers.VariantManager; +import org.slf4j.Logger; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.*; + +public class FutureSnpAnnotator implements Callable>> { + private VariantManager variantManager; + + private List variantList; + private int dataRelease; + + private Logger logger; + + public FutureSnpAnnotator(List variantList, int dataRelease, VariantManager variantManager, Logger logger) { + this.variantManager = variantManager; + + this.variantList = variantList; + this.dataRelease = dataRelease; + + this.logger = logger; + } + + @Override + public List> call() throws Exception { + long startTime = System.currentTimeMillis(); + + List> cellBaseDataResultList = new ArrayList<>(variantList.size()); + + logger.debug("SNP queries..."); + // Want to return only one CellBaseDataResult object per Variant + List includes = new ArrayList<>(); + includes.add("id"); + includes.add("source"); + String logMsg = StringUtils.join(includes, ","); + logger.info("SNP annotation/search includes: {}", logMsg); + for (Variant variant : variantList) { + SnpQuery query = new SnpQuery(); + query.setChromosome(variant.getChromosome()); + query.setPosition(variant.getStart()); + query.setReference(variant.getReference()); + query.setDataRelease(dataRelease); + query.setIncludes(includes); + cellBaseDataResultList.add(variantManager.searchSnp(query)); + } + logger.info("SNP queries performance in {} ms for {} variants", System.currentTimeMillis() - startTime, variantList.size()); + return cellBaseDataResultList; + } + + public void processResults(Future>> snpFuture, List variantAnnotationList) + throws InterruptedException, ExecutionException { + List> snpCellBaseDataResults; + try { + snpCellBaseDataResults = snpFuture.get(30, TimeUnit.SECONDS); + } catch (TimeoutException e) { + snpFuture.cancel(true); + throw new ExecutionException("Unable to finish SNP query on time", e); + } + + if (CollectionUtils.isNotEmpty(snpCellBaseDataResults)) { + for (int i = 0; i < variantAnnotationList.size(); i++) { + CellBaseDataResult snpResult = snpCellBaseDataResults.get(i); + if (snpResult != null && CollectionUtils.isNotEmpty(snpResult.getResults())) { + List xrefs = new ArrayList<>(); + for (Snp snp : snpResult.getResults()) { + xrefs.add(new Xref(snp.getId(), snp.getSource())); + } + if (CollectionUtils.isNotEmpty(xrefs)) { + if (variantAnnotationList.get(i).getXrefs() == null) { + variantAnnotationList.get(i).setXrefs(new ArrayList<>()); + } + variantAnnotationList.get(i).getXrefs().addAll(xrefs); + } + } + } + } + } +} diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSpliceScoreAnnotator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSpliceScoreAnnotator.java new file mode 100644 index 0000000000..40523fdbc8 --- /dev/null +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSpliceScoreAnnotator.java @@ -0,0 +1,109 @@ +/* + * Copyright 2015-2020 OpenCB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.opencb.cellbase.lib.variant.annotation.futures; + +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.opencb.biodata.models.core.SpliceScore; +import org.opencb.biodata.models.core.SpliceScoreAlternate; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.ConsequenceType; +import org.opencb.biodata.models.variant.avro.SpliceScores; +import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.cellbase.core.result.CellBaseDataResult; +import org.opencb.cellbase.lib.managers.VariantManager; +import org.slf4j.Logger; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.*; + +public class FutureSpliceScoreAnnotator implements Callable>> { + private List variantList; + private int dataRelease; + private String apiKey; + + private VariantManager variantManager; + + private Logger logger; + + public FutureSpliceScoreAnnotator(List variantList, int dataRelease, String apiKey, VariantManager variantManager, + Logger logger) { + this.variantList = variantList; + this.dataRelease = dataRelease; + this.apiKey = apiKey; + + this.variantManager = variantManager; + + this.logger = logger; + } + + @Override + public List> call() throws Exception { + long startTime = System.currentTimeMillis(); + + List> cellBaseDataResultList = new ArrayList<>(variantList.size()); + + logger.debug("Query splice"); + // Want to return only one CellBaseDataResult object per Variant + for (Variant variant : variantList) { + cellBaseDataResultList.add(variantManager.getSpliceScoreVariant(variant, apiKey, dataRelease)); + } + logger.debug("Splice score query performance is {}ms for {} variants", System.currentTimeMillis() - startTime, + variantList.size()); + return cellBaseDataResultList; + } + + public void processResults(Future>> spliceFuture, List variantAnnotationList) + throws InterruptedException, ExecutionException { + List> spliceCellBaseDataResults; + try { + spliceCellBaseDataResults = spliceFuture.get(30, TimeUnit.SECONDS); + } catch (TimeoutException e) { + spliceFuture.cancel(true); + throw new ExecutionException("Unable to finish splice score query on time", e); + } + + if (CollectionUtils.isNotEmpty(spliceCellBaseDataResults)) { + for (int i = 0; i < variantAnnotationList.size(); i++) { + CellBaseDataResult spliceScoreResult = spliceCellBaseDataResults.get(i); + if (spliceScoreResult != null && CollectionUtils.isNotEmpty(spliceScoreResult.getResults())) { + for (SpliceScore spliceScore : spliceScoreResult.getResults()) { + for (ConsequenceType ct : variantAnnotationList.get(i).getConsequenceTypes()) { + for (SpliceScoreAlternate spliceScoreAlt : spliceScore.getAlternates()) { + String alt = StringUtils.isEmpty(variantAnnotationList.get(i).getAlternate()) + ? "-" + : variantAnnotationList.get(i).getAlternate(); + if (alt.equals(spliceScoreAlt.getAltAllele())) { + if (StringUtils.isEmpty(spliceScore.getTranscriptId()) + || StringUtils.isEmpty(ct.getTranscriptId()) + || spliceScore.getTranscriptId().equals(ct.getTranscriptId())) { + SpliceScores scores = new SpliceScores(spliceScore.getSource(), spliceScoreAlt.getScores()); + if (ct.getSpliceScores() == null) { + ct.setSpliceScores(new ArrayList<>()); + } + ct.getSpliceScores().add(scores); + } + } + } + } + } + } + } + } + } +} From f206fe5064bacacd931983fbc955111552103dbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Wed, 13 Mar 2024 15:11:55 +0100 Subject: [PATCH 16/26] lib: the variant annotation calculator search SNP ids only if the collection snp exists, #TASK-5821, #TASK-5789 --- .../lib/variant/annotation/VariantAnnotationCalculator.java | 2 +- .../opencb/cellbase/server/rest/genomic/VariantWSServer.java | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java index 5b8444e6fe..1b86b49367 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java @@ -469,7 +469,7 @@ private List runAnnotationProcess(List normalizedVar FutureSnpAnnotator futureSnpAnnotator = null; Future>> snpFuture = null; - if (annotatorSet.contains("xrefs")) { + if (annotatorSet.contains("xrefs") && dataRelease.getCollections().containsKey(EtlCommons.SNP_COLLECTION_NAME)) { futureSnpAnnotator = new FutureSnpAnnotator(normalizedVariantList, dataRelease.getRelease(), variantManager, logger); snpFuture = CACHED_THREAD_POOL.submit(futureSnpAnnotator); } diff --git a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java index 6792ebc401..03e9f515f2 100755 --- a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java +++ b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java @@ -458,6 +458,9 @@ public Response getAllConsequenceTypes() { } } + //------------------------------------------------------------------------- + // S N P + //------------------------------------------------------------------------- @GET @Path("/snp/search") @ApiOperation(httpMethod = "GET", value = "Get SNPs", response = Snp.class, responseContainer = "QueryResponse") From 2dbd3b4c50ee0eed25fa69a90d78a97ba807a405 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 15 Mar 2024 16:44:29 +0100 Subject: [PATCH 17/26] lib: normalize variants created by SNPs before searching in the variation collection, #TASK-5820, #TASK-5789 --- .../cellbase/core/exception/CellBaseException.java | 3 +++ .../lib/impl/core/VariantMongoDBAdaptor.java | 13 ++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/exception/CellBaseException.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/exception/CellBaseException.java index 884c63f2ae..a3b54942d5 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/exception/CellBaseException.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/exception/CellBaseException.java @@ -22,5 +22,8 @@ public CellBaseException(String msg) { super(msg); } + public CellBaseException(String msg, Exception e) { + super(msg, e); + } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java index 1861fab024..e598ead901 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java @@ -30,6 +30,8 @@ import org.opencb.biodata.models.variant.avro.Score; import org.opencb.biodata.models.variant.avro.StructuralVariantType; import org.opencb.biodata.models.variant.avro.VariantType; +import org.opencb.biodata.models.variant.exceptions.NonStandardCompliantSampleField; +import org.opencb.biodata.tools.variant.VariantNormalizer; import org.opencb.cellbase.core.ParamConstants; import org.opencb.cellbase.core.api.VariantQuery; import org.opencb.cellbase.core.api.query.CellBaseQueryOptions; @@ -813,9 +815,18 @@ private List getVariantIds(List ids, int dataRelease) throws Cel // 3. Build the variant IDs Set results = new HashSet<>(); if (snpDataResult.getNumResults() > 0) { + // Create variant normalizer + VariantNormalizer variantNormalizer = new VariantNormalizer(); + for (Snp snp : snpDataResult.getResults()) { for (String alternate : snp.getAlternates()) { - results.add(snp.getChromosome() + ":" + snp.getPosition() + ":" + snp.getReference() + ":" + alternate); + Variant inputVariant = new Variant(snp.getChromosome(), snp.getPosition(), snp.getReference(), alternate); + try { + Variant normalizedVariant = variantNormalizer.normalize(Collections.singletonList(inputVariant), true).get(0); + results.add(normalizedVariant.toString()); + } catch (NonStandardCompliantSampleField e) { + throw new CellBaseException("Error normalizing variant " + inputVariant, e); + } } } } From 0b173e9a4a60d20472d20d2e10e20d56b76f6960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 15 Mar 2024 16:50:20 +0100 Subject: [PATCH 18/26] Fix some sonnar issues, #TASK-5789 --- .../lib/impl/core/VariantMongoDBAdaptor.java | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java index e598ead901..7be8cf75f1 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java @@ -115,10 +115,10 @@ public CellBaseDataResult update(List objectList, String field, String[] i CellBaseDataResult nLoadedObjects = null; switch (field) { case POP_FREQUENCIES_FIELD: - nLoadedObjects = updatePopulationFrequencies((List) objectList, dataRelease); + nLoadedObjects = updatePopulationFrequencies(objectList, dataRelease); break; case ANNOTATION_FIELD: - nLoadedObjects = updateAnnotation((List) objectList, innerFields, dataRelease); + nLoadedObjects = updateAnnotation(objectList, innerFields, dataRelease); break; default: logger.error("Invalid field {}: no action implemented for updating this field.", field); @@ -285,11 +285,13 @@ public Bson parseQuery(VariantQuery query) throws CellBaseException { break; case "ciStartLeft": createImprecisePositionQueryStart(query, andBsonList); + break; case "ciEndRight": // don't do anything, this is parsed later break; case "ciEndLeft": createImprecisePositionQueryEnd(query, andBsonList); + break; case "gene": createGeneOrQuery(query, andBsonList); break; @@ -359,7 +361,7 @@ private void createTypeQuery(VariantQuery query, String typeMongoField, String s andBsonList.add(Filters.or(orBsonList)); // Inversion or just CNV (without subtype) } else { - andBsonList.add(Filters.eq(typeMongoField, variantTypeString.toString())); + andBsonList.add(Filters.eq(typeMongoField, variantTypeString)); } } } @@ -450,7 +452,7 @@ private CellBaseDataResult updateAnnotation(List variantDocument for (Document variantDBObject : variantDocumentList) { Document annotationDBObject = (Document) variantDBObject.get(ANNOTATION_FIELD); Document toOverwrite = new Document(); - if (innerFields != null & innerFields.length > 0) { + if (innerFields != null && innerFields.length > 0) { for (String field : innerFields) { if (annotationDBObject.get(field) != null) { toOverwrite.put(ANNOTATION_FIELD + "." + field, annotationDBObject.get(field)); @@ -810,7 +812,8 @@ private List getVariantIds(List ids, int dataRelease) throws Cel // 2. We must exclude as much information as possible to improve performance MongoDBCollection mongoDBCollection = getCollectionByRelease(snpDBCollectionByRelease, dataRelease); - DataResult snpDataResult = mongoDBCollection.find(query, Projections.exclude("annotation"), Snp.class, new QueryOptions()); + DataResult snpDataResult = mongoDBCollection.find(query, Projections.exclude(ANNOTATION_FIELD), Snp.class, + new QueryOptions()); // 3. Build the variant IDs Set results = new HashSet<>(); From e5bfdabd3006fe89ff2ca6717d7515a301fe5990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 28 Mar 2024 11:19:28 +0100 Subject: [PATCH 19/26] server: fix endpoint /versions by returning the sources from the input data release, #TASK-5704 --- .../opencb/cellbase/core/ParamConstants.java | 4 +-- .../cellbase/server/rest/MetaWSServer.java | 34 +++++++++++++++---- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/ParamConstants.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/ParamConstants.java index 3c2ca5791d..482324bdd1 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/ParamConstants.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/ParamConstants.java @@ -113,8 +113,8 @@ public Type type() { public static final String VERSION_DESCRIPTION = "API version, e.g.: " + DEFAULT_VERSION; public static final String DATA_RELEASE_PARAM = "dataRelease"; - public static final String DATA_RELEASE_DESCRIPTION = "Data release. To use the default data release, set this to 0. To get the list" - + " of available data release, please call the endpoint 'meta/dataReleases'"; + public static final String DATA_RELEASE_DESCRIPTION = "Data release. To get the list of available data release, please call the" + + " endpoint 'meta/dataReleases'"; public static final String API_KEY_PARAM = "apiKey"; public static final String API_KEY_DESCRIPTION = "API key to allow access to licensed/restricted data sources such as" diff --git a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/MetaWSServer.java b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/MetaWSServer.java index 331f562585..d8bb3a9f6d 100644 --- a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/MetaWSServer.java +++ b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/MetaWSServer.java @@ -28,6 +28,7 @@ import org.opencb.cellbase.core.config.SpeciesProperties; import org.opencb.cellbase.core.exception.CellBaseException; import org.opencb.cellbase.core.models.DataRelease; +import org.opencb.cellbase.core.models.DataReleaseSource; import org.opencb.cellbase.core.result.CellBaseDataResult; import org.opencb.cellbase.core.utils.SpeciesUtils; import org.opencb.cellbase.lib.managers.DataReleaseManager; @@ -56,6 +57,9 @@ import java.text.SimpleDateFormat; import java.util.*; +import static org.opencb.cellbase.lib.EtlCommons.COSMIC_DATA; +import static org.opencb.cellbase.lib.EtlCommons.HGMD_DATA; + /** * Created by imedina on 04/08/15. */ @@ -84,13 +88,17 @@ public MetaWSServer(@PathParam("apiVersion") @GET @Path("/{species}/versions") @ApiOperation(httpMethod = "GET", value = "Returns source version metadata, including source urls from which " - + "data files were downloaded.", response = DownloadProperties.class, responseContainer = "QueryResponse") + + "data files were downloaded.", response = DataReleaseSource.class, responseContainer = "QueryResponse") public Response getVersion(@PathParam("species") @ApiParam(name = "species", value = ParamConstants.SPECIES_DESCRIPTION, defaultValue = ParamConstants.DEFAULT_SPECIES, required = true) String species, @ApiParam(name = "assembly", value = ParamConstants.ASSEMBLY_DESCRIPTION, - defaultValue = ParamConstants.DEFAULT_ASSEMBLY) @QueryParam("assembly") String assembly) { + defaultValue = ParamConstants.DEFAULT_ASSEMBLY) @QueryParam("assembly") String assembly, + @ApiParam(name = "dataRelease", value = ParamConstants.DATA_RELEASE_DESCRIPTION) @QueryParam("dataRelease") + int dataRelease) { try { + long dbTimeStart; + dbTimeStart = System.currentTimeMillis(); if (StringUtils.isEmpty(assembly)) { SpeciesConfiguration.Assembly assemblyObject = SpeciesUtils.getDefaultAssembly(cellBaseConfiguration, species); if (assemblyObject != null) { @@ -98,12 +106,24 @@ public Response getVersion(@PathParam("species") } } if (!SpeciesUtils.validateSpeciesAndAssembly(cellBaseConfiguration, species, assembly)) { - return createErrorResponse("getVersion", "Invalid species: '" + species + "' or assembly: '" + return createErrorResponse("/versions", "Invalid species: '" + species + "' or assembly: '" + assembly + "'"); } - logger.error("species " + species); - CellBaseDataResult queryResult = metaManager.getVersions(species, assembly); - return createOkResponse(queryResult); + DataReleaseManager dataReleaseManager = cellBaseManagerFactory.getDataReleaseManager(species, assembly); + DataRelease dr = dataReleaseManager.get(dataRelease); + if (dr == null) { + return createErrorResponse("/versions", "Could not find data release '" + dataRelease + "'"); + } + // Remove some sources + List sources = new ArrayList<>(); + for (DataReleaseSource source : dr.getSources()) { + if (!COSMIC_DATA.equalsIgnoreCase(source.getName()) && !HGMD_DATA.equalsIgnoreCase(source.getName())) { + sources.add(source); + } + } + int dbTime = Long.valueOf(System.currentTimeMillis() - dbTimeStart).intValue(); + return createOkResponse(new CellBaseDataResult<>("versions", dbTime, Collections.emptyList(), sources.size(), sources, + sources.size())); } catch (CellBaseException e) { return createErrorResponse(e); } @@ -135,7 +155,7 @@ public Response getDataRelease(@PathParam("species") } } if (!SpeciesUtils.validateSpeciesAndAssembly(cellBaseConfiguration, species, assembly)) { - return createErrorResponse("getVersion", "Invalid species: '" + species + "' or assembly: '" + return createErrorResponse("/dataReleases", "Invalid species: '" + species + "' or assembly: '" + assembly + "'"); } DataReleaseManager dataReleaseManager = cellBaseManagerFactory.getDataReleaseManager(species, assembly); From 94880c1b1e4a86039eb92172e83fa62d8fc13f19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 28 Mar 2024 11:51:58 +0100 Subject: [PATCH 20/26] core: fix typo, #TASK-5704 --- .../src/main/java/org/opencb/cellbase/core/ParamConstants.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/ParamConstants.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/ParamConstants.java index 482324bdd1..b056103910 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/ParamConstants.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/ParamConstants.java @@ -113,7 +113,7 @@ public Type type() { public static final String VERSION_DESCRIPTION = "API version, e.g.: " + DEFAULT_VERSION; public static final String DATA_RELEASE_PARAM = "dataRelease"; - public static final String DATA_RELEASE_DESCRIPTION = "Data release. To get the list of available data release, please call the" + public static final String DATA_RELEASE_DESCRIPTION = "Data release. To get the list of available data releases, please call the" + " endpoint 'meta/dataReleases'"; public static final String API_KEY_PARAM = "apiKey"; From 8e5f1b4e647fa52d1fc7f162bd80d9f4e29994f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 12 Apr 2024 15:28:54 +0200 Subject: [PATCH 21/26] client: update VariantClient to take into account the dbSNP endpoints, #TASK-6020, #TASK-5789 --- .../cellbase/client/rest/VariantClient.java | 9 +++ .../client/rest/VariantClientTest.java | 79 +++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/cellbase-client/src/main/java/org/opencb/cellbase/client/rest/VariantClient.java b/cellbase-client/src/main/java/org/opencb/cellbase/client/rest/VariantClient.java index e9479ad18c..3e308e08d0 100644 --- a/cellbase-client/src/main/java/org/opencb/cellbase/client/rest/VariantClient.java +++ b/cellbase-client/src/main/java/org/opencb/cellbase/client/rest/VariantClient.java @@ -17,6 +17,7 @@ package org.opencb.cellbase.client.rest; import org.apache.commons.lang3.StringUtils; +import org.opencb.biodata.models.core.Snp; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.*; import org.opencb.cellbase.client.config.ClientConfiguration; @@ -236,6 +237,14 @@ public CellBaseDataResponse getAllConsequenceTypes(Query query) throws I return execute("consequenceTypes", query, new QueryOptions(), String.class); } + public CellBaseDataResponse searchSnp(Query query, QueryOptions options) throws IOException { + return execute("snp/search", query, options, Snp.class); + } + + public CellBaseDataResponse startsWithSnp(Query query, QueryOptions options) throws IOException { + return execute("snp/startsWith", query, options, Snp.class); + } + // public CellBaseDataResponse getConsequenceTypeById(String id, QueryOptions options) throws IOException { // return execute(id, "consequence_type", options, String.class); // } diff --git a/cellbase-client/src/test/java/org/opencb/cellbase/client/rest/VariantClientTest.java b/cellbase-client/src/test/java/org/opencb/cellbase/client/rest/VariantClientTest.java index 082c056d40..c167c9461a 100644 --- a/cellbase-client/src/test/java/org/opencb/cellbase/client/rest/VariantClientTest.java +++ b/cellbase-client/src/test/java/org/opencb/cellbase/client/rest/VariantClientTest.java @@ -18,17 +18,25 @@ import org.apache.avro.specific.SpecificRecordBase; import org.apache.commons.collections4.CollectionUtils; +import org.junit.Assert; +import org.junit.Assume; +import org.junit.Test; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ArgumentsSource; +import org.opencb.biodata.models.core.Snp; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.ConsequenceType; import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.cellbase.client.config.ClientConfiguration; +import org.opencb.cellbase.client.config.RestConfig; +import org.opencb.cellbase.core.common.GitRepositoryState; import org.opencb.cellbase.core.result.CellBaseDataResponse; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; +import javax.ws.rs.QueryParam; import java.util.*; import java.util.stream.Collectors; @@ -139,6 +147,77 @@ public void getAllConsequenceTypes(CellBaseClient cellBaseClient) throws Excepti assertNotNull(response.firstResult(), "List of all the consequence types present should be returned"); } + @Test + public void testSearchSnpBydbSnpId() throws Exception { + Assume.assumeTrue(GitRepositoryState.get().getBranch().equals("TASK-5789")); + + ClientConfiguration clientConfiguration = new ClientConfiguration() + .setDefaultSpecies("hsapiens") + .setVersion("v5.8.3-SNAPSHOT") + .setRest(new RestConfig(Collections.singletonList("https://ws.zettagenomics.com/cellbase"), 2000)); + + CellBaseClient client = new CellBaseClient(clientConfiguration); + + Query query = new Query(); + query.put("id", "rs1570391602,rs41278952"); + query.put("dataRelease", 7); + + CellBaseDataResponse response = client.getVariantClient().searchSnp(query, new QueryOptions()); + assertEquals(2, response.getResponses().get(0).getNumResults()); + assertEquals("rs1570391602", response.getResponses().get(0).getResults().get(0).getId()); + assertEquals("rs41278952", response.getResponses().get(0).getResults().get(1).getId()); + } + + @Test + public void testSearchSnpByPosition() throws Exception { + Assume.assumeTrue(GitRepositoryState.get().getBranch().equals("TASK-5789")); + + ClientConfiguration clientConfiguration = new ClientConfiguration() + .setDefaultSpecies("hsapiens") + .setVersion("v5.8.3-SNAPSHOT") + .setRest(new RestConfig(Collections.singletonList("https://ws.zettagenomics.com/cellbase"), 2000)); + + CellBaseClient client = new CellBaseClient(clientConfiguration); + + Query query = new Query(); + query.put("chromosome", "1"); + query.put("position", "56948509"); + query.put("reference", "T"); + query.put("dataRelease", 7); + + CellBaseDataResponse response = client.getVariantClient().searchSnp(query, new QueryOptions()); + assertEquals(1, response.getResponses().get(0).getNumResults()); + assertEquals("rs1570391602", response.getResponses().get(0).getResults().get(0).getId()); + assertEquals(query.getInt("position"), response.getResponses().get(0).getResults().get(0).getPosition()); + assertEquals(query.get("reference"), response.getResponses().get(0).getResults().get(0).getReference()); + assertEquals(1, response.getResponses().get(0).getResults().get(0).getAlternates().size()); + assertEquals("G", response.getResponses().get(0).getResults().get(0).getAlternates().get(0)); + } + + @Test + public void testStarsWithSnp() throws Exception { + Assume.assumeTrue(GitRepositoryState.get().getBranch().equals("TASK-5789")); + + ClientConfiguration clientConfiguration = new ClientConfiguration() + .setDefaultSpecies("hsapiens") + .setVersion("v5.8.3-SNAPSHOT") + .setRest(new RestConfig(Collections.singletonList("https://ws.zettagenomics.com/cellbase"), 2000)); + + CellBaseClient client = new CellBaseClient(clientConfiguration); + + Query query = new Query(); + query.put("id", "rs157039161"); + query.put("dataRelease", 7); + + CellBaseDataResponse response = client.getVariantClient().startsWithSnp(query, new QueryOptions()); + assertEquals(9, response.getResponses().get(0).getNumResults()); + for (Snp snp : response.getResponses().get(0).getResults()) { + if (!snp.getId().startsWith(query.getString("id"))) { + fail(); + } + } + } + // @Test // public void getConsequenceTypeById() throws Exception { // CellBaseDataResponse stringCellBaseDataResponse = cellBaseClient.getVariantClient().getConsequenceTypeById("22:35490160:G:A", null); From 377c0b7f4448036ee7d86511f5c53fdbe30d1caf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 12 Apr 2024 15:39:35 +0200 Subject: [PATCH 22/26] client: use JUnit 5, #TASK-6020, #TASK-5789 --- .../cellbase/client/rest/VariantClientTest.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/cellbase-client/src/test/java/org/opencb/cellbase/client/rest/VariantClientTest.java b/cellbase-client/src/test/java/org/opencb/cellbase/client/rest/VariantClientTest.java index c167c9461a..1ae1e6d274 100644 --- a/cellbase-client/src/test/java/org/opencb/cellbase/client/rest/VariantClientTest.java +++ b/cellbase-client/src/test/java/org/opencb/cellbase/client/rest/VariantClientTest.java @@ -18,10 +18,9 @@ import org.apache.avro.specific.SpecificRecordBase; import org.apache.commons.collections4.CollectionUtils; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Test; +import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ArgumentsSource; @@ -36,7 +35,6 @@ import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import javax.ws.rs.QueryParam; import java.util.*; import java.util.stream.Collectors; @@ -149,7 +147,7 @@ public void getAllConsequenceTypes(CellBaseClient cellBaseClient) throws Excepti @Test public void testSearchSnpBydbSnpId() throws Exception { - Assume.assumeTrue(GitRepositoryState.get().getBranch().equals("TASK-5789")); + Assumptions.assumeTrue(GitRepositoryState.get().getBranch().equals("TASK-5789")); ClientConfiguration clientConfiguration = new ClientConfiguration() .setDefaultSpecies("hsapiens") @@ -170,7 +168,7 @@ public void testSearchSnpBydbSnpId() throws Exception { @Test public void testSearchSnpByPosition() throws Exception { - Assume.assumeTrue(GitRepositoryState.get().getBranch().equals("TASK-5789")); + Assumptions.assumeTrue(GitRepositoryState.get().getBranch().equals("TASK-5789")); ClientConfiguration clientConfiguration = new ClientConfiguration() .setDefaultSpecies("hsapiens") @@ -196,7 +194,7 @@ public void testSearchSnpByPosition() throws Exception { @Test public void testStarsWithSnp() throws Exception { - Assume.assumeTrue(GitRepositoryState.get().getBranch().equals("TASK-5789")); + Assumptions.assumeTrue(GitRepositoryState.get().getBranch().equals("TASK-5789")); ClientConfiguration clientConfiguration = new ClientConfiguration() .setDefaultSpecies("hsapiens") From 1777ca2a23981c9774782e4fb8ce834cd3bcc4ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 16 Apr 2024 10:45:09 +0200 Subject: [PATCH 23/26] lib: check and remove duplicated xrefs/dbSNP, #TASK-5821, #TASK-5789 --- .../futures/FutureSnpAnnotator.java | 34 ++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSnpAnnotator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSnpAnnotator.java index bc982d6587..a14dd62e69 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSnpAnnotator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSnpAnnotator.java @@ -88,15 +88,33 @@ public void processResults(Future>> snpFuture, List for (int i = 0; i < variantAnnotationList.size(); i++) { CellBaseDataResult snpResult = snpCellBaseDataResults.get(i); if (snpResult != null && CollectionUtils.isNotEmpty(snpResult.getResults())) { - List xrefs = new ArrayList<>(); - for (Snp snp : snpResult.getResults()) { - xrefs.add(new Xref(snp.getId(), snp.getSource())); - } - if (CollectionUtils.isNotEmpty(xrefs)) { - if (variantAnnotationList.get(i).getXrefs() == null) { - variantAnnotationList.get(i).setXrefs(new ArrayList<>()); + if (CollectionUtils.isEmpty(variantAnnotationList.get(i).getXrefs())) { + // Add all dbSNP to the xrefs + variantAnnotationList.get(i).setXrefs(new ArrayList<>()); + for (Snp snp : snpResult.getResults()) { + variantAnnotationList.get(i).getXrefs().add(new Xref(snp.getId(), snp.getSource())); + } + } else { + // Check if the xrefs are already in the annotation (e.g., GWAS builder might add dbSNP IDs) + List newXrefs = new ArrayList<>(); + for (Snp snp : snpResult.getResults()) { + // Sanity check + if (StringUtils.isNotEmpty(snp.getId()) && StringUtils.isNotEmpty(snp.getSource())) { + boolean found = false; + for (Xref xref : variantAnnotationList.get(i).getXrefs()) { + if (snp.getId().equalsIgnoreCase(xref.getId()) && snp.getSource().equalsIgnoreCase(xref.getSource())) { + found = true; + break; + } + } + if (!found) { + newXrefs.add(new Xref(snp.getId(), snp.getSource())); + } + } + } + if (CollectionUtils.isNotEmpty(newXrefs)) { + variantAnnotationList.get(i).getXrefs().addAll(newXrefs); } - variantAnnotationList.get(i).getXrefs().addAll(xrefs); } } } From 30f167cf589dc747ab2505a12eeedf5228e974d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Wed, 17 Apr 2024 10:15:51 +0200 Subject: [PATCH 24/26] test: improve JUnit tests, and exception management, #TASK-6020, #TASK-5789 --- .../client/rest/VariantClientTest.java | 37 ++++++++++++++----- .../org/opencb/cellbase/lib/EtlCommons.java | 2 + .../download/VariationDownloadManager.java | 3 +- .../lib/impl/core/SnpMongoDBAdaptor.java | 4 +- .../lib/impl/core/VariantMongoDBAdaptor.java | 2 +- 5 files changed, 35 insertions(+), 13 deletions(-) diff --git a/cellbase-client/src/test/java/org/opencb/cellbase/client/rest/VariantClientTest.java b/cellbase-client/src/test/java/org/opencb/cellbase/client/rest/VariantClientTest.java index 1ae1e6d274..9b39239538 100644 --- a/cellbase-client/src/test/java/org/opencb/cellbase/client/rest/VariantClientTest.java +++ b/cellbase-client/src/test/java/org/opencb/cellbase/client/rest/VariantClientTest.java @@ -18,6 +18,7 @@ import org.apache.avro.specific.SpecificRecordBase; import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -31,9 +32,12 @@ import org.opencb.cellbase.client.config.ClientConfiguration; import org.opencb.cellbase.client.config.RestConfig; import org.opencb.cellbase.core.common.GitRepositoryState; +import org.opencb.cellbase.core.models.DataRelease; import org.opencb.cellbase.core.result.CellBaseDataResponse; +import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.commons.utils.VersionUtils; import java.util.*; import java.util.stream.Collectors; @@ -147,8 +151,7 @@ public void getAllConsequenceTypes(CellBaseClient cellBaseClient) throws Excepti @Test public void testSearchSnpBydbSnpId() throws Exception { - Assumptions.assumeTrue(GitRepositoryState.get().getBranch().equals("TASK-5789")); - + int dataRelease = 7; ClientConfiguration clientConfiguration = new ClientConfiguration() .setDefaultSpecies("hsapiens") .setVersion("v5.8.3-SNAPSHOT") @@ -156,9 +159,15 @@ public void testSearchSnpBydbSnpId() throws Exception { CellBaseClient client = new CellBaseClient(clientConfiguration); + // Assumptions before running the test + ObjectMap result = client.getMetaClient().about().firstResult(); + Assumptions.assumeTrue(VersionUtils.isMinVersion("5.8.3-SNAPSHOT", result.getString("Version"))); + CellBaseDataResponse dataReleaseResponse = client.getMetaClient().dataReleases(); + Assumptions.assumeTrue(dataReleaseResponse.getResponses().get(0).getResults().stream().map(DataRelease::getRelease).collect(Collectors.toList()).contains(dataRelease)); + Query query = new Query(); query.put("id", "rs1570391602,rs41278952"); - query.put("dataRelease", 7); + query.put("dataRelease", dataRelease); CellBaseDataResponse response = client.getVariantClient().searchSnp(query, new QueryOptions()); assertEquals(2, response.getResponses().get(0).getNumResults()); @@ -168,8 +177,7 @@ public void testSearchSnpBydbSnpId() throws Exception { @Test public void testSearchSnpByPosition() throws Exception { - Assumptions.assumeTrue(GitRepositoryState.get().getBranch().equals("TASK-5789")); - + int dataRelease = 7; ClientConfiguration clientConfiguration = new ClientConfiguration() .setDefaultSpecies("hsapiens") .setVersion("v5.8.3-SNAPSHOT") @@ -177,11 +185,17 @@ public void testSearchSnpByPosition() throws Exception { CellBaseClient client = new CellBaseClient(clientConfiguration); + // Assumptions before running the test + ObjectMap result = client.getMetaClient().about().firstResult(); + Assumptions.assumeTrue(VersionUtils.isMinVersion("5.8.3-SNAPSHOT", result.getString("Version"))); + CellBaseDataResponse dataReleaseResponse = client.getMetaClient().dataReleases(); + Assumptions.assumeTrue(dataReleaseResponse.getResponses().get(0).getResults().stream().map(DataRelease::getRelease).collect(Collectors.toList()).contains(dataRelease)); + Query query = new Query(); query.put("chromosome", "1"); query.put("position", "56948509"); query.put("reference", "T"); - query.put("dataRelease", 7); + query.put("dataRelease", dataRelease); CellBaseDataResponse response = client.getVariantClient().searchSnp(query, new QueryOptions()); assertEquals(1, response.getResponses().get(0).getNumResults()); @@ -194,8 +208,7 @@ public void testSearchSnpByPosition() throws Exception { @Test public void testStarsWithSnp() throws Exception { - Assumptions.assumeTrue(GitRepositoryState.get().getBranch().equals("TASK-5789")); - + int dataRelease = 7; ClientConfiguration clientConfiguration = new ClientConfiguration() .setDefaultSpecies("hsapiens") .setVersion("v5.8.3-SNAPSHOT") @@ -203,9 +216,15 @@ public void testStarsWithSnp() throws Exception { CellBaseClient client = new CellBaseClient(clientConfiguration); + // Assumptions before running the test + ObjectMap result = client.getMetaClient().about().firstResult(); + Assumptions.assumeTrue(VersionUtils.isMinVersion("5.8.3-SNAPSHOT", result.getString("Version"))); + CellBaseDataResponse dataReleaseResponse = client.getMetaClient().dataReleases(); + Assumptions.assumeTrue(dataReleaseResponse.getResponses().get(0).getResults().stream().map(DataRelease::getRelease).collect(Collectors.toList()).contains(dataRelease)); + Query query = new Query(); query.put("id", "rs157039161"); - query.put("dataRelease", 7); + query.put("dataRelease", dataRelease); CellBaseDataResponse response = client.getVariantClient().startsWithSnp(query, new QueryOptions()); assertEquals(9, response.getResponses().get(0).getNumResults()); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java index 8d5b4c55c4..edf41c1e11 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java @@ -35,6 +35,8 @@ */ public class EtlCommons { + public static final String HOMO_SAPIENS_NAME ="Homo sapiens"; + public static final String GENOME_DATA = "genome"; public static final String GENE_DATA = "gene"; public static final String REFSEQ_DATA = "refseq"; diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java index 7f317d5f7b..7586505d21 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java @@ -19,6 +19,7 @@ import org.opencb.cellbase.core.config.CellBaseConfiguration; import org.opencb.cellbase.core.config.DownloadProperties; import org.opencb.cellbase.core.exception.CellBaseException; +import org.opencb.cellbase.lib.EtlCommons; import java.io.IOException; import java.nio.file.Files; @@ -45,7 +46,7 @@ public DownloadFile downloadDbSnp() throws IOException, InterruptedException { if (!speciesHasInfoToDownload(speciesConfiguration, VARIATION_DATA)) { return null; } - if (speciesConfiguration.getScientificName().equals("Homo sapiens")) { + if (speciesConfiguration.getScientificName().equals(EtlCommons.HOMO_SAPIENS_NAME)) { logger.info("Downloading dbSNP information ..."); Path variation = downloadFolder.resolve(VARIATION_DATA); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/SnpMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/SnpMongoDBAdaptor.java index 56757d5ead..6b3d78ce83 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/SnpMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/SnpMongoDBAdaptor.java @@ -122,7 +122,7 @@ public CellBaseDataResult startsWith(String id, QueryOptions options, int d return new CellBaseDataResult<>(mongoDBCollection.find(regex, projection, CONVERTER, options)); } - public Bson parseQuery(SnpQuery query) { + public Bson parseQuery(SnpQuery query) throws CellBaseException { List andBsonList = new ArrayList<>(); try { for (Map.Entry entry : query.toObjectMap().entrySet()) { @@ -145,7 +145,7 @@ public Bson parseQuery(SnpQuery query) { } } } catch (IllegalAccessException e) { - e.printStackTrace(); + throw new CellBaseException("Error parsing SNP query: " + query, e); } logger.info("SnpMongoDBAdaptor parsed query: {}", andBsonList); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java index 7be8cf75f1..3c33266f7e 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java @@ -305,7 +305,7 @@ public Bson parseQuery(VariantQuery query) throws CellBaseException { } } } catch (IllegalAccessException e) { - throw new CellBaseException(e.getMessage()); + throw new CellBaseException("Error parsing variant query: " + query, e); } logger.debug("variant parsed query: {}", andBsonList); From a0ca427115659532674b8455eb3e99018cc22775 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 30 Apr 2024 10:38:48 +0200 Subject: [PATCH 25/26] Prepare release 5.8.3 --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index 0ac6807a6d..b561d3cc62 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.3-SNAPSHOT + 5.8.3 ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 39295305ab..96f8c1d062 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.3-SNAPSHOT + 5.8.3 ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index aeacc5f42f..cd396d0e1e 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.3-SNAPSHOT + 5.8.3 ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 1f0cab6002..15351ef02b 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.3-SNAPSHOT + 5.8.3 ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index 805d371305..101a284617 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.3-SNAPSHOT + 5.8.3 ../pom.xml diff --git a/pom.xml b/pom.xml index 0d8d1b00a8..e41d354314 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.3-SNAPSHOT + 5.8.3 pom CellBase project @@ -23,8 +23,8 @@ ${project.version} - 4.12.1-SNAPSHOT - 2.12.2-SNAPSHOT + 4.12.0 + 2.12.2 0.1.0 2.11.4 1.9.13 From 08b0e1d9603319157abdf0ee0ed80f938066d067 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 6 Aug 2024 17:29:39 +0200 Subject: [PATCH 26/26] Prepare Port Patch Cellbase 5.8.3 -> 6.3.0 #TASK-6647 --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index b561d3cc62..aed90e9897 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.3 + 6.3.0-SNAPSHOT ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 96f8c1d062..7424c21bbb 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.3 + 6.3.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index cd396d0e1e..7c74e13d92 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.3 + 6.3.0-SNAPSHOT ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 15351ef02b..780fc20043 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.3 + 6.3.0-SNAPSHOT ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index 101a284617..fe4509c6fc 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.3 + 6.3.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index e41d354314..8454cea683 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.3 + 6.3.0-SNAPSHOT pom CellBase project