-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #222 from SpatialHackathon/main
Sync w main
- Loading branch information
Showing
23 changed files
with
239 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
#!/usr/bin/env python | ||
|
||
# Author_and_contribution: Niklas Mueller-Boetticher; created template | ||
# Author_and_contribution: Florian Heyl (heylf); created code | ||
|
||
import argparse | ||
import os | ||
import tempfile | ||
import requests | ||
import pandas as pd | ||
import scipy | ||
import json | ||
import pandas as pd | ||
import numpy as np | ||
|
||
from scipy.io import mmwrite | ||
from pathlib import Path | ||
|
||
def download_data(url, destination_folder, file_name): | ||
print(f'[INFO] Downloading annotated data from {url} and put it into {destination_folder}...') | ||
|
||
# Create the destination folder if it doesn't exist | ||
if not os.path.exists(destination_folder): | ||
os.makedirs(destination_folder) | ||
|
||
# Get the file name from the URL | ||
file_name = os.path.join(destination_folder, file_name) | ||
|
||
# Download the file | ||
response = requests.get(url) | ||
with open(file_name, 'wb') as file: | ||
file.write(response.content) | ||
|
||
print('...done') | ||
|
||
#def get_data(out_dir): | ||
def get_data(out_dir): | ||
|
||
with tempfile.TemporaryDirectory() as tmpdir: | ||
print('[INFO] created temporary directory', tmpdir) | ||
print('[START] COMPOSING DATA') | ||
|
||
# Names and urls of the samples are so inconsistent that I have to list them manually | ||
samples = ['well7_5','well10','well09','well04','well06','well07','well03','well01OB', | ||
'well05','sagittal3','well01brain','well1_5','well2_5','sagittal1','spinalcord', | ||
'well11','sagittal2','well3_5','well08'] | ||
|
||
n_cluster = [] | ||
directories = [] | ||
|
||
for sample in samples: | ||
print(f'[INFO] Get sample {sample}') | ||
|
||
if not os.path.exists(f'{out_dir}/{sample}'): | ||
os.makedirs(f'{out_dir}/{sample}') | ||
|
||
download_data(f'https://zenodo.org/records/8327576/files/{sample}_spatial.csv?download=1', | ||
f'{tmpdir}', f'{sample}_spatial.csv') | ||
|
||
sample_dir = f'{out_dir}/{sample}' | ||
directories.append(sample_dir) | ||
|
||
# Write out coordinates.tsv, labels.tsv and observations.tsv | ||
with open(f'{tmpdir}/{sample}_spatial.csv', 'r') as f_in, \ | ||
open(f'{sample_dir}/labels.tsv', 'w') as f_out_labels,\ | ||
open(f'{sample_dir}/coordinates.tsv', 'w') as f_out_coords, \ | ||
open(f'{sample_dir}/observations.tsv', 'w') as f_out_obs : | ||
|
||
# skip first two lines | ||
headline_1 = f_in.readline() | ||
headline_2 = f_in.readline() | ||
|
||
f_out_obs.write(headline_1.replace(',','\t').replace('NAME', '')) | ||
|
||
clusters = [] | ||
|
||
f_out_coords.write('\tx\ty\tz\n') | ||
f_out_labels.write('\tMain_molecular_cell_type\tSub_molecular_cell_type\tMain_molecular_tissue_region\ | ||
\tSub_molecular_tissue_region\tMolecular_spatial_cell_type\n') | ||
|
||
for l in f_in: | ||
data = l.strip('\n').split(",") | ||
f_out_labels.write(f'{data[0]}\t{data[4]}\t{data[5]}\t{data[6]}\t{data[7]}\t{data[8]}\n') | ||
f_out_coords.write(f'{data[0]}\t{data[1]}\t{data[2]}\t{data[3]}\n') | ||
f_out_obs.write(l.replace(',','\t')) | ||
clusters.append(data[4]) | ||
|
||
n_cluster.append(len(set(clusters))) | ||
|
||
download_data(f'https://zenodo.org/records/8327576/files/{sample}raw_expression_pd.csv?download=1', | ||
f'{tmpdir}', f'{sample}raw_expression_pd.csv') | ||
|
||
# Write counts.mtx | ||
df = pd.read_table(f'{tmpdir}/{sample}raw_expression_pd.csv', sep=',', index_col=0) | ||
features = df.index | ||
df = df.transpose() | ||
mmwrite(f'{sample_dir}/counts.mtx', scipy.sparse.csr_matrix(df)) | ||
|
||
# Write out features.tsv | ||
with open(f'{sample_dir}/features.tsv', 'w') as f_out_features : | ||
f_out_features.write('\tgene_version\n') | ||
for feature in features: | ||
f_out_features.write(f'{feature}\tNA\n') | ||
|
||
## Metadata files | ||
download_data(f'https://zenodo.org/records/8327576/files/metadata.csv?download=1', | ||
f'{tmpdir}', 'metadata.csv') | ||
|
||
tmp_samples = [] | ||
position = [] | ||
patient = [] | ||
|
||
with open(f'{tmpdir}/metadata.csv', 'r') as f_in: | ||
f_in.readline() | ||
f_in.readline() | ||
for l in f_in: | ||
data = l.strip('\n').split(",") | ||
if ( "_".join(data[0].split("_")[:-1]) not in tmp_samples): | ||
tmp_samples.append("_".join(data[0].split("_")[:-1])) | ||
position.append(data[8]) | ||
patient.append(data[2]) | ||
|
||
sort_metadata = [tmp_samples.index(x) for x in samples] | ||
position = np.array(position)[sort_metadata] | ||
patient = np.array(patient)[sort_metadata] | ||
|
||
samples_df = pd.DataFrame({'patient': patient, 'sample': samples, 'position': position, | ||
'replicate': ['NA']*len(samples), 'directory': directories, 'n_clusters': n_cluster}) | ||
samples_df.loc[ | ||
:, ["patient", "sample", "position", "replicate", "directory", "n_clusters"] | ||
].to_csv(f'{out_dir}/samples.tsv', sep="\t", index_label="") | ||
|
||
with open(f"{out_dir}/experiment.json", "w") as f: | ||
exp_info = {"technology": 'STARmap+'} | ||
json.dump(exp_info, f) | ||
|
||
print('[FINISH]') | ||
|
||
def main(): | ||
# Set up command-line argument parser | ||
parser = argparse.ArgumentParser(description="Load data for STARmap+ dataset. This dataset contains spatial gene \ | ||
expression profiles of 1,022 genes mapped in 3D at a voxel size of \ | ||
194 X 194 X 345 nm3 in 1.09 million high-quality cells in the mouse CNS.") | ||
|
||
# Add arguments for input and output folders | ||
parser.add_argument('-o','--out_dir', help="Output directory to write files to.",required=True) | ||
|
||
# Parse the command-line arguments | ||
args = parser.parse_args() | ||
print(args) | ||
print(args.out_dir) | ||
get_data(args.out_dir) | ||
|
||
if __name__ == '__main__': | ||
main() | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
channels: | ||
- conda-forge | ||
dependencies: | ||
- gdown=5.1.0 | ||
- pandas=2.2.0 | ||
- requests=2.31.0 | ||
- numpy=1.26.4 | ||
- python=3.12.2 | ||
- scipy=1.12.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
{ | ||
"matrix": "counts", | ||
"integrated_feature_selection": false, | ||
"matrix": "transform", | ||
"integrated_feature_selection": true, | ||
"image": false, | ||
"neighbors": false, | ||
"neighbors": true, | ||
"config_file": true | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.