-
Notifications
You must be signed in to change notification settings - Fork 0
/
neuron_categories.py
72 lines (61 loc) · 3.02 KB
/
neuron_categories.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import os
import pandas as pd
CATEGORIES_FILE = '/iblsn/data/Arjun/neurons/neuron_categories/neuron_categories.csv'
CATEGORIES_FILE_FILTERED = '/iblsn/data/Arjun/neurons/neuron_categories/neuron_categories_filtered.csv'
DATASETS_DIR = '/iblsn/data/Arjun/neurons/datasets'
def remove_sep(string, sep='_'):
return string.replace(sep, ' ')
def add_count_col(df, categories):
return df.groupby(categories).size().reset_index(name='count')
def remove_commas(string):
return string.replace(',', '')
def write_categories():
with open(CATEGORIES_FILE, 'w') as f:
f.write('neuron name, cell type, species, region, lab\n')
for cell_type in os.listdir(DATASETS_DIR):
for species in os.listdir(DATASETS_DIR + '/' + cell_type):
for region in os.listdir(DATASETS_DIR + '/' + cell_type + '/' + species):
for lab in os.listdir(DATASETS_DIR + "/" + cell_type + '/' + species+ '/' + region):
for neuron_file in os.listdir(DATASETS_DIR + "/" + cell_type + "/" + species + '/' + region + '/' + lab):
if neuron_file[-8:] != ".CNG.swc":
continue
name = neuron_file[:-8]
print name
write_items = [cell_type.lower(), species.lower(), region.lower(), lab.lower()]
write_items = map(remove_commas, write_items)
write_items = map(remove_sep, write_items)
write_items = [name] + write_items
write_items = ', '.join(write_items)
f.write('%s\n' % write_items)
def filter_categories():
df = pd.read_csv(CATEGORIES_FILE, skipinitialspace=True)
categories = ['cell type', 'species', 'region', 'lab']
for category in ['cell type', 'species', 'region', 'lab']:
df[category] = df[category].apply(remove_sep)
df.drop_duplicates(inplace=True)
for category in categories:
df2 = add_count_col(df, category)
df2 = pd.merge(df, df2)
max_vals = {}
for name, group in df2.groupby('neuron name'):
counts = list(group['count'])
vals = list(group[category])
count_vals = zip(counts, vals)
if (category == 'cell type'):
count_vals2 = filter(lambda (count, val) : val != 'principal cell', count_vals)
if len(count_vals2) > 0:
#count_vals = count_vals2
pass
max_count, max_val = max(count_vals)
max_vals[name] = max_val
df['max ' + category] = df['neuron name'].map(max_vals)
df = df[df[category] == df['max ' + category]]
df.drop('max ' + category, axis=1, inplace=True)
df.to_csv(CATEGORIES_FILE_FILTERED, index=False)
for name, group in df.groupby('cell type'):
print name, len(group.index)
def main():
write_categories()
#filter_categories()
if __name__ == '__main__':
main()