Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TASK-6647 - Fix Port Patch 1.10.4 -> 2.2.1 develop #706

Merged
merged 31 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
d4416aa
Prepare next release 5.8.3-SNAPSHOT
juanfeSanahuja Feb 6, 2024
e49d994
Implement dbSNP file download as 'variation' TASK-5794
imedina Mar 11, 2024
26a008a
Implement dbSNP file download as 'variation' TASK-5794
imedina Mar 11, 2024
a267dc3
Implement VariationBuilder TASK-5794
imedina Mar 11, 2024
c583edd
lib: minor changes when downloading dbSNP data, #TASK-5815, #TASK-5789
jtarraga Mar 11, 2024
b75b1f7
lib: some improvements in downloading dbSNP data, #TASK-5816, #TASK-5789
jtarraga Mar 11, 2024
4e68dab
lib: load dbSNP data in the CellBase MongoDB collection 'snp', #TASK-…
jtarraga Mar 11, 2024
2d6ca96
lib: implement SnpMongoDBAdaptor, #TASK-5794, TASK-5789
jtarraga Mar 11, 2024
56b3eb6
server: add endpoint 'snp' and update endpotins variant/search varian…
jtarraga Mar 11, 2024
82e57f3
Several improvements
imedina Mar 12, 2024
bbccffe
Final dbSNP builder implemented
imedina Mar 12, 2024
692cad2
lib: update VariantMongoDBAdaptor according to the SNP biodata change…
jtarraga Mar 12, 2024
90a4f68
lib: fix NPE, #TASK-5816, #TASK-5789
jtarraga Mar 12, 2024
36c64fe
server: add the endpoints variant/snp/search and variant/snp/startsWi…
jtarraga Mar 13, 2024
7670702
server: update variant annotator to return the dbSNP IDs in the field…
jtarraga Mar 13, 2024
f206fe5
lib: the variant annotation calculator search SNP ids only if the col…
jtarraga Mar 13, 2024
2dbd3b4
lib: normalize variants created by SNPs before searching in the varia…
jtarraga Mar 15, 2024
0b173e9
Fix some sonnar issues, #TASK-5789
jtarraga Mar 15, 2024
e5bfdab
server: fix endpoint /versions by returning the sources from the inpu…
jtarraga Mar 28, 2024
94880c1
core: fix typo, #TASK-5704
jtarraga Mar 28, 2024
3531ecc
Merge pull request #688 from opencb/TASK-5704
jtarraga Apr 5, 2024
ba59eaa
Merge branch 'release-5.8.x' into TASK-5789
jtarraga Apr 9, 2024
8e5f1b4
client: update VariantClient to take into account the dbSNP endpoints…
jtarraga Apr 12, 2024
4cf6541
Merge branch 'TASK-5789' of https://github.com/opencb/cellbase into T…
jtarraga Apr 12, 2024
377c0b7
client: use JUnit 5, #TASK-6020, #TASK-5789
jtarraga Apr 12, 2024
1777ca2
lib: check and remove duplicated xrefs/dbSNP, #TASK-5821, #TASK-5789
jtarraga Apr 16, 2024
30f167c
test: improve JUnit tests, and exception management, #TASK-6020, #TAS…
jtarraga Apr 17, 2024
a65e082
Merge pull request #689 from opencb/TASK-5789
jtarraga Apr 17, 2024
a0ca427
Prepare release 5.8.3
juanfeSanahuja Apr 30, 2024
08b0e1d
Prepare Port Patch Cellbase 5.8.3 -> 6.3.0 #TASK-6647
juanfeSanahuja Aug 6, 2024
dee7972
Merge branch 'develop' into TASK-6647-dev
juanfeSanahuja Aug 6, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import com.beust.jcommander.*;
import org.opencb.cellbase.app.cli.CliOptionsParser;
import org.opencb.cellbase.core.api.key.ApiKeyQuota;
import org.opencb.cellbase.lib.EtlCommons;

import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -74,6 +75,7 @@ public AdminCliOptionsParser() {
jCommander.addCommand("validate", validationCommandOptions);
}

@Override
public void parse(String[] args) throws ParameterException {
jCommander.parse(args);
}
Expand All @@ -87,9 +89,13 @@ public class DownloadCommandOptions {
@ParametersDelegate
public SpeciesAndAssemblyCommandOptions speciesAndAssemblyOptions = speciesAndAssemblyCommandOptions;

@Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to download: genome, gene, "
+ "variation, variation_functional_score, regulation, protein, conservation, "
+ "clinical_variants, repeats, svs, pubmed and 'all' to download everything", required = true, arity = 1)
@Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to download:"
+ EtlCommons.GENOME_DATA + ", " + EtlCommons.GENE_DATA + ", " + EtlCommons.VARIATION_DATA + ", "
+ EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA + ", " + EtlCommons.MISSENSE_VARIATION_SCORE_DATA + ", "
+ EtlCommons.REGULATION_DATA + ", " + EtlCommons.PROTEIN_DATA + ", " + EtlCommons.CONSERVATION_DATA + ", "
+ EtlCommons.CLINICAL_VARIANTS_DATA + ", " + EtlCommons.REPEATS_DATA + ", " + EtlCommons.OBO_DATA + ", "
+ EtlCommons.PUBMED_DATA + ", " + EtlCommons.PHARMACOGENOMICS_DATA + "; and 'all' to download everything",
required = true, arity = 1)
public String data;

@Parameter(names = {"-o", "--outdir"}, description = "Downloaded files will be saved in this directory.", required = true, arity = 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import java.util.Collections;
import java.util.List;

import static org.opencb.cellbase.lib.EtlCommons.PHARMGKB_DATA;
import static org.opencb.cellbase.lib.EtlCommons.*;

/**
* Created by imedina on 03/02/15.
Expand Down Expand Up @@ -132,6 +132,9 @@ public void execute() {
case EtlCommons.REFSEQ_DATA:
parser = buildRefSeq();
break;
case EtlCommons.VARIATION_DATA:
parser = buildVariation();
break;
case EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA:
parser = buildCadd();
break;
Expand Down Expand Up @@ -275,6 +278,21 @@ private CellBaseBuilder buildRefSeq() {
return new RefSeqGeneBuilder(refseqFolderPath, speciesConfiguration, serializer);
}

private CellBaseBuilder buildVariation() throws IOException {
Path downloadVariationPath = downloadFolder.resolve(VARIATION_DATA);
Path buildVariationPath = buildFolder.resolve(VARIATION_DATA);
if (!buildVariationPath.toFile().exists()) {
buildVariationPath.toFile().mkdirs();
}

CellBaseFileSerializer variationSerializer = new CellBaseJsonFileSerializer(buildVariationPath);

// Currently, only dbSNP data
Files.copy(downloadVariationPath.resolve(DBSNP_VERSION_FILENAME), buildVariationPath.resolve(DBSNP_VERSION_FILENAME),
StandardCopyOption.REPLACE_EXISTING);
return new VariationBuilder(downloadVariationPath, variationSerializer, configuration);
}

private CellBaseBuilder buildCadd() {
Path variationFunctionalScorePath = downloadFolder.resolve("variation_functional_score");
copyVersionFiles(Arrays.asList(variationFunctionalScorePath.resolve("caddVersion.json")));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ public void execute() {
case EtlCommons.GENE_DATA:
downloadFiles.addAll(downloader.downloadGene());
break;
// case EtlCommons.VARIATION_DATA:
// downloadManager.downloadVariation();
// break;
case EtlCommons.VARIATION_DATA:
downloadFiles.addAll(downloader.downloadVariation());
break;
case EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA:
downloadFiles.addAll(downloader.downloadCaddScores());
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
import java.util.List;
import java.util.concurrent.ExecutionException;

import static org.opencb.cellbase.lib.EtlCommons.*;

/**
* Created by imedina on 03/02/15.
*/
Expand Down Expand Up @@ -372,30 +374,57 @@ private void checkParameters() throws CellBaseException {
private void loadVariationData() throws NoSuchMethodException, InterruptedException, ExecutionException,
InstantiationException, IllegalAccessException, InvocationTargetException, ClassNotFoundException,
IOException, LoaderException, CellBaseException {
Path variationPath = input.resolve(VARIATION_DATA);
// First load data
// Common loading process from CellBase variation data models
if (field == null) {
DirectoryStream<Path> stream = Files.newDirectoryStream(input,
// Common loading process from CellBase variation data models
DirectoryStream<Path> stream = Files.newDirectoryStream(variationPath,
entry -> entry.getFileName().toString().startsWith("variation_chr"));

int numLoadings = 0;
for (Path entry : stream) {
logger.info("Loading file '{}'", entry);
loadRunner.load(input.resolve(entry.getFileName()), "variation", dataRelease);
loadRunner.load(variationPath.resolve(entry.getFileName()), "variation", dataRelease);
numLoadings++;
}

// Create index
createIndex("variation");

// Update release (collection and sources)
List<Path> sources = new ArrayList<>(Arrays.asList(
input.resolve("ensemblVariationVersion.json")
));
dataReleaseManager.update(dataRelease, "variation", EtlCommons.VARIATION_DATA, sources);
if (numLoadings > 0) {
// Create index
createIndex("variation");

// Update release (collection and sources)
List<Path> sources = new ArrayList<>(Arrays.asList(
variationPath.resolve("ensemblVariationVersion.json")
));
dataReleaseManager.update(dataRelease, "variation", EtlCommons.VARIATION_DATA, sources);
} else {
logger.info("Any variation file 'variation_chr...' found within folder '{}'", variationPath);
}
} else {
// Custom update required e.g. population freqs loading
logger.info("Loading file '{}'", variationPath);
loadRunner.load(variationPath, "variation", dataRelease, field, innerFields);
}

// Load dbSNP
Path dbSnpFilePath = variationPath.resolve(DBSNP_NAME + ".json.gz");
if (dbSnpFilePath.toFile().exists()) {
if (variationPath.resolve(DBSNP_VERSION_FILENAME).toFile().exists()) {
logger.info("Loading dbSNP file '{}'", dbSnpFilePath);
loadRunner.load(dbSnpFilePath, SNP_COLLECTION_NAME, dataRelease);

// Create index
createIndex(SNP_COLLECTION_NAME);

// Update release (collection and sources)
List<Path> sources = Collections.singletonList(variationPath.resolve(DBSNP_VERSION_FILENAME));
dataReleaseManager.update(dataRelease, SNP_COLLECTION_NAME, EtlCommons.VARIATION_DATA, sources);
} else {
logger.warn("In order to load the dbSNP file you need the version file {} within the folder '{}'", DBSNP_VERSION_FILENAME,
variationPath);
}
} else {
logger.info("Loading file '{}'", input);
loadRunner.load(input, "variation", dataRelease, field, innerFields);
logger.warn("Any dbSNP file found within the folder '{}'", variationPath);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.opencb.cellbase.client.rest;

import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.core.Snp;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.*;
import org.opencb.cellbase.client.config.ClientConfiguration;
Expand Down Expand Up @@ -236,6 +237,14 @@ public CellBaseDataResponse<String> getAllConsequenceTypes(Query query) throws I
return execute("consequenceTypes", query, new QueryOptions(), String.class);
}

public CellBaseDataResponse<Snp> searchSnp(Query query, QueryOptions options) throws IOException {
return execute("snp/search", query, options, Snp.class);
}

public CellBaseDataResponse<Snp> startsWithSnp(Query query, QueryOptions options) throws IOException {
return execute("snp/startsWith", query, options, Snp.class);
}

// public CellBaseDataResponse<String> getConsequenceTypeById(String id, QueryOptions options) throws IOException {
// return execute(id, "consequence_type", options, String.class);
// }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,26 @@

import org.apache.avro.specific.SpecificRecordBase;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.Assumptions;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInstance;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ArgumentsSource;
import org.opencb.biodata.models.core.Snp;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.ConsequenceType;
import org.opencb.biodata.models.variant.avro.VariantAnnotation;
import org.opencb.cellbase.client.config.ClientConfiguration;
import org.opencb.cellbase.client.config.RestConfig;
import org.opencb.cellbase.core.common.GitRepositoryState;
import org.opencb.cellbase.core.models.DataRelease;
import org.opencb.cellbase.core.result.CellBaseDataResponse;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.utils.VersionUtils;

import java.util.*;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -139,6 +149,92 @@ public void getAllConsequenceTypes(CellBaseClient cellBaseClient) throws Excepti
assertNotNull(response.firstResult(), "List of all the consequence types present should be returned");
}

@Test
public void testSearchSnpBydbSnpId() throws Exception {
int dataRelease = 7;
ClientConfiguration clientConfiguration = new ClientConfiguration()
.setDefaultSpecies("hsapiens")
.setVersion("v5.8.3-SNAPSHOT")
.setRest(new RestConfig(Collections.singletonList("https://ws.zettagenomics.com/cellbase"), 2000));

CellBaseClient client = new CellBaseClient(clientConfiguration);

// Assumptions before running the test
ObjectMap result = client.getMetaClient().about().firstResult();
Assumptions.assumeTrue(VersionUtils.isMinVersion("5.8.3-SNAPSHOT", result.getString("Version")));
CellBaseDataResponse<DataRelease> dataReleaseResponse = client.getMetaClient().dataReleases();
Assumptions.assumeTrue(dataReleaseResponse.getResponses().get(0).getResults().stream().map(DataRelease::getRelease).collect(Collectors.toList()).contains(dataRelease));

Query query = new Query();
query.put("id", "rs1570391602,rs41278952");
query.put("dataRelease", dataRelease);

CellBaseDataResponse<Snp> response = client.getVariantClient().searchSnp(query, new QueryOptions());
assertEquals(2, response.getResponses().get(0).getNumResults());
assertEquals("rs1570391602", response.getResponses().get(0).getResults().get(0).getId());
assertEquals("rs41278952", response.getResponses().get(0).getResults().get(1).getId());
}

@Test
public void testSearchSnpByPosition() throws Exception {
int dataRelease = 7;
ClientConfiguration clientConfiguration = new ClientConfiguration()
.setDefaultSpecies("hsapiens")
.setVersion("v5.8.3-SNAPSHOT")
.setRest(new RestConfig(Collections.singletonList("https://ws.zettagenomics.com/cellbase"), 2000));

CellBaseClient client = new CellBaseClient(clientConfiguration);

// Assumptions before running the test
ObjectMap result = client.getMetaClient().about().firstResult();
Assumptions.assumeTrue(VersionUtils.isMinVersion("5.8.3-SNAPSHOT", result.getString("Version")));
CellBaseDataResponse<DataRelease> dataReleaseResponse = client.getMetaClient().dataReleases();
Assumptions.assumeTrue(dataReleaseResponse.getResponses().get(0).getResults().stream().map(DataRelease::getRelease).collect(Collectors.toList()).contains(dataRelease));

Query query = new Query();
query.put("chromosome", "1");
query.put("position", "56948509");
query.put("reference", "T");
query.put("dataRelease", dataRelease);

CellBaseDataResponse<Snp> response = client.getVariantClient().searchSnp(query, new QueryOptions());
assertEquals(1, response.getResponses().get(0).getNumResults());
assertEquals("rs1570391602", response.getResponses().get(0).getResults().get(0).getId());
assertEquals(query.getInt("position"), response.getResponses().get(0).getResults().get(0).getPosition());
assertEquals(query.get("reference"), response.getResponses().get(0).getResults().get(0).getReference());
assertEquals(1, response.getResponses().get(0).getResults().get(0).getAlternates().size());
assertEquals("G", response.getResponses().get(0).getResults().get(0).getAlternates().get(0));
}

@Test
public void testStarsWithSnp() throws Exception {
int dataRelease = 7;
ClientConfiguration clientConfiguration = new ClientConfiguration()
.setDefaultSpecies("hsapiens")
.setVersion("v5.8.3-SNAPSHOT")
.setRest(new RestConfig(Collections.singletonList("https://ws.zettagenomics.com/cellbase"), 2000));

CellBaseClient client = new CellBaseClient(clientConfiguration);

// Assumptions before running the test
ObjectMap result = client.getMetaClient().about().firstResult();
Assumptions.assumeTrue(VersionUtils.isMinVersion("5.8.3-SNAPSHOT", result.getString("Version")));
CellBaseDataResponse<DataRelease> dataReleaseResponse = client.getMetaClient().dataReleases();
Assumptions.assumeTrue(dataReleaseResponse.getResponses().get(0).getResults().stream().map(DataRelease::getRelease).collect(Collectors.toList()).contains(dataRelease));

Query query = new Query();
query.put("id", "rs157039161");
query.put("dataRelease", dataRelease);

CellBaseDataResponse<Snp> response = client.getVariantClient().startsWithSnp(query, new QueryOptions());
assertEquals(9, response.getResponses().get(0).getNumResults());
for (Snp snp : response.getResponses().get(0).getResults()) {
if (!snp.getId().startsWith(query.getString("id"))) {
fail();
}
}
}

// @Test
// public void getConsequenceTypeById() throws Exception {
// CellBaseDataResponse<String> stringCellBaseDataResponse = cellBaseClient.getVariantClient().getConsequenceTypeById("22:35490160:G:A", null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ public Type type() {
public static final String VERSION_DESCRIPTION = "API version, e.g.: " + DEFAULT_VERSION;

public static final String DATA_RELEASE_PARAM = "dataRelease";
public static final String DATA_RELEASE_DESCRIPTION = "Data release. To use the default data release, set this to 0. To get the list"
+ " of available data release, please call the endpoint 'meta/dataReleases'";
public static final String DATA_RELEASE_DESCRIPTION = "Data release. To get the list of available data releases, please call the"
+ " endpoint 'meta/dataReleases'";

public static final String API_KEY_PARAM = "apiKey";
public static final String API_KEY_DESCRIPTION = "API key to allow access to licensed/restricted data sources such as"
Expand Down
Loading
Loading