Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bring up to date #3

Open
wants to merge 48 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
6ff5783
make fair comparison between recurrent and image approach
Jul 3, 2017
dac58d4
skip first epochs for loss
jkiesele Aug 8, 2017
ec8e2a5
introduced new loss: logcosh - very similar to Huber loss. Added poss…
jkiesele Aug 10, 2017
30f7724
registered loss in global list
jkiesele Aug 10, 2017
7abb390
added logcosh loss for single number
jkiesele Aug 10, 2017
8f98737
some plotting improvements and bugfixes. basis for random selection i…
jkiesele Aug 14, 2017
564fd63
comparison script
jkiesele Aug 25, 2017
0dc2a87
small scripts and mayor performance and stability update for core fra…
jkiesele Sep 5, 2017
3830db3
changes needed for incremental studies
Sep 6, 2017
792c697
merge conflicts
Sep 6, 2017
576e492
signficant core improvements
jkiesele Sep 8, 2017
a01cdc6
core developments
jkiesele Sep 9, 2017
9474a7c
min preread of 2 files
jkiesele Sep 9, 2017
4830caa
MAJOR: Finally resuming a training works. There is a problem in the o…
jkiesele Sep 10, 2017
5972627
updated HDD tuning
Sep 11, 2017
1bfd4d7
bugfix
Sep 11, 2017
841708d
some fixes in reading and better error messages
jkiesele Sep 14, 2017
d7f3c4a
deepCSV example
jkiesele Sep 14, 2017
fea9009
some small performance improvements, better pick up of training where…
jkiesele Sep 21, 2017
051dc94
small adjustments
jkiesele Sep 21, 2017
94e938f
significant speed-up of conversion
jkiesele Sep 21, 2017
d9ad64d
more performance increases
jkiesele Sep 22, 2017
0ca3886
small adjustments, reintroduce random shuffle in generator
jkiesele Sep 22, 2017
de845a7
small bugfix
jkiesele Sep 26, 2017
7e7f92b
some locking cleanup
jkiesele Sep 26, 2017
0bb8efe
some performance improvements... still some to do
jkiesele Oct 6, 2017
209ff76
some fixes
jkiesele Oct 8, 2017
7880918
new file IO system. Signficantly improved performance
jkiesele Oct 8, 2017
c93b0c2
speed-up of conversion.
jkiesele Oct 9, 2017
56cc348
small bugfix
jkiesele Oct 9, 2017
0206f84
updates
jkiesele Nov 17, 2017
74fb886
Merge branch 'master' of https://github.com/mstoye/DeepJet
Dec 4, 2017
c7f4add
Merge remote-tracking branch 'loc/incremental_studies' into increment…
Dec 4, 2017
a5252dc
more usable roc output
jkiesele Dec 4, 2017
0676b06
snapshot
jkiesele Dec 6, 2017
734a237
automatic numbering of output lists
jkiesele Dec 6, 2017
7905140
Merge pull request #25 from mverzett/master
Dec 18, 2017
f525549
added a no-puppi DeepFlavour version
Dec 19, 2017
0fa26cc
Merge branch 'master' of https://github.com/mstoye/DeepJet into incre…
Dec 19, 2017
9d92fab
larger batch size
Jan 23, 2018
b84daaa
update deepCSV to newer workflow
Jan 23, 2018
2db12d2
Domain adaptation: first sets of commits
Jan 23, 2018
73af1df
yet another update
Jan 23, 2018
5a407d3
bugfixes and modification for the DomainAdaptation scripts
Jan 30, 2018
49239dc
domain adaptation losses and training scripts
Jan 30, 2018
360cd20
Merge pull request #26 from mverzett/master
jkiesele Jan 30, 2018
f318add
gradient reversal layer
Jan 31, 2018
cb1aa95
Merge pull request #27 from mverzett/master
Jan 31, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ __pycache__
*.png
*.eps
*.bak
*.h5
*.csv
*.root
*.json
Binary file removed Analysis/test.pdf
Binary file not shown.
196 changes: 196 additions & 0 deletions Train/Plotting/Jan/compareTwoModels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@


from argparse import ArgumentParser

parser = ArgumentParser('make a set of ROC curves, comparing two training')
parser.add_argument('inputDirA')
parser.add_argument('inputDirB')
parser.add_argument('outputDir')
args = parser.parse_args()


outdir=args.outputDir+'/'


from testing import makeROCs_async, makePlots_async, testDescriptor
#from keras.models import load_model
from DataCollection import DataCollection

import os
os.system('mkdir -p '+outdir)

trainings=[args.inputDirA,
args.inputDirB]


trainings.extend(trainings)

filesttbar=[]
for t in trainings:
filesttbar.append(t+'/ttbar/tree_association.txt')

filesqcd=[]
for t in trainings:
filesqcd.append(t+'/qcd_600_800/tree_association.txt')


legend=['standard','p_{T} cut']

btruth='isB+isBB+isGBB+isLeptonicB+isLeptonicB_C'
ctruth='isC+isCC+isGCC'


bprob=[ 'prob_isB+prob_isBB+prob_isLeptB',
'prob_isB+prob_isBB+prob_isLeptB',

'prob_isB+prob_isBB+prob_isLeptB',
'prob_isB+prob_isBB+prob_isLeptB',
]

cprob=[ 'prob_isC',
'prob_isC',

'prob_isC',
'prob_isC']

usdprob=['prob_isUDS',
'prob_isUDS',

'prob_isUDS',
'prob_isUDS',]




print('creating ROCs')

#makeROCs_async(intextfile,
# name_list,
# probabilities_list,
# truths_list,
# vetos_list,
# colors_list,
# outpdffile,
# cuts,
# cmsstyle,
# firstcomment,
# secondcomment,
# extralegend+None,
# logY=True)




for ptcut in ['30','150']:

makeROCs_async(intextfile=filesttbar,
name_list=legend,
probabilities_list=bprob,
truths_list=btruth,
vetos_list=len(legend)*['isUD+isS+isG']+len(legend)*['isC'],
colors_list='auto',
outpdffile=outdir+"btag_pt"+ptcut+".pdf",
cuts='jet_pt>'+ptcut,
cmsstyle=True,
firstcomment='t#bar{t} events',
secondcomment='jet p_{T} > '+ptcut+' GeV',
extralegend=None,
logY=True,
individual=True
)

makeROCs_async(intextfile=filesttbar,
name_list=legend,
probabilities_list=cprob,
truths_list=ctruth,
vetos_list=len(legend)*['isUD+isS+isG']+len(legend)*[btruth],
colors_list='auto',
outpdffile=outdir+"ctag_pt"+ptcut+".pdf",
cuts='jet_pt>'+ptcut,
cmsstyle=True,
firstcomment='t#bar{t} events',
secondcomment='jet p_{T} > '+ptcut+' GeV',
extralegend=['solid?udsg','dashed?b'],
logY=True,
individual=True)

makeROCs_async(intextfile=filesttbar,
name_list=legend,
probabilities_list=usdprob,
truths_list='isUD+isS',
vetos_list=len(legend)*['isG']+len(legend)*['isB+isLeptonicB+isLeptonicB_C+isC'],
colors_list='auto',
outpdffile=outdir+"gtag_pt"+ptcut+".pdf",
cuts='jet_pt>'+ptcut,
cmsstyle=True,
firstcomment='t#bar{t} events',
secondcomment='jet p_{T} > '+ptcut+' GeV',
extralegend=['solid?g','dashed?bc'],
logY=True,
individual=True)


makeROCs_async(intextfile=filesqcd,
name_list=legend,
probabilities_list=bprob,
truths_list=btruth,
vetos_list=len(legend)*['isUD+isS+isG']+len(legend)*['isC'],
colors_list='auto',
outpdffile=outdir+"btag_qcd_pt400.pdf",
cuts='jet_pt>400',
cmsstyle=True,
firstcomment='QCD, 600 < p_{T} < 800 GeV',
secondcomment='jet p_{T} > 400 GeV',
extralegend=None,
logY=True,
individual=True)

makeROCs_async(intextfile=filesqcd,
name_list=legend,
probabilities_list=cprob,
truths_list=ctruth,
vetos_list=len(legend)*['isUD+isS+isG']+len(legend)*[btruth],
colors_list='auto',
outpdffile=outdir+"ctag_qcd_pt400.pdf",
cuts='jet_pt>400',
cmsstyle=True,
firstcomment='QCD, 600 < p_{T} < 800 GeV',
secondcomment='jet p_{T} > 400 GeV',
extralegend=['solid?udsg','dashed?b'],
logY=True,
individual=True)

makeROCs_async(intextfile=filesqcd,
name_list=legend,
probabilities_list=usdprob,
truths_list='isUD+isS',
vetos_list=len(legend)*['isG']+len(legend)*['isB+isLeptonicB+isLeptonicB_C+isC'],
colors_list='auto',
outpdffile=outdir+"gtag_qcd_pt400.pdf",
cuts='jet_pt>400',
cmsstyle=True,
firstcomment='QCD, 600 < p_{T} < 800 GeV',
secondcomment='jet p_{T} > 400 GeV',
extralegend=['solid?g','dashed?bc'],
logY=False,
individual=True)


#individual plot for top/ttbar


makeROCs_async(intextfile=[filesttbar[1]],
name_list=['DeepFlavour'],
probabilities_list='prob_isUDS+prob_isC',
truths_list='isUD+isS+isC',
vetos_list=1*['isG']+1*['isB+isLeptonicB+isLeptonicB_C'],
colors_list='auto',
outpdffile=outdir+"lightQuarkJets_pt30.pdf",
cuts='jet_pt>400',
cmsstyle=True,
firstcomment='t#bar{t} events',
secondcomment='jet p_{T} > 30 GeV',
extralegend=['solid?g','dashed?b'],
logY=False,
individual=True)

9 changes: 8 additions & 1 deletion Train/QGRegression_recurrent.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
)

from DataCollection import DataCollection
from TrainData_PT_recur import TrainData_PT_recur
from TrainData_PT_recur import TrainData_PT_recur, TrainData_recurrent_fullTruth

traind = DataCollection(args.inputfile)
traind.useweights = config_args['useweights']
Expand Down Expand Up @@ -147,6 +147,13 @@ def identity(generator):
traind.writeToFile(outputDir+'trainsamples.dc')
testd.writeToFile( outputDir+'valsamples.dc')

#make sure tokens don't expire
from tokenTools import checkTokens, renew_token_process
from thread import start_new_thread

checkTokens()
start_new_thread(renew_token_process,())

print 'training'
try:
model.fit_generator(
Expand Down
7 changes: 7 additions & 0 deletions Train/QGRegression_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,13 @@
traind.writeToFile(outputDir+'trainsamples.dc')
testd.writeToFile( outputDir+'valsamples.dc')

#make sure tokens don't expire
from tokenTools import checkTokens, renew_token_process
from thread import start_new_thread

checkTokens()
start_new_thread(renew_token_process,())

print 'training'
try:
model.fit_generator(
Expand Down
55 changes: 31 additions & 24 deletions Train/QG_Class_Regr_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
# configure the in/out/split etc
config_args = { #we might want to move it to an external file
'testrun' : False,
'nepochs' : 2,
'nepochs' : 100,
'batchsize' : 2000,
'startlearnrate' : 0.0005,
'useweights' : False,
Expand Down Expand Up @@ -93,6 +93,13 @@ def identity(generator):
for i in generator:
yield i

#make sure tokens don't expire
from tokenTools import checkTokens, renew_token_process
from thread import start_new_thread

checkTokens()
start_new_thread(renew_token_process,())

if args.mode == 'class':
model = TrainData_image.classification_model(input_shapes, output_shapes[0])
model.compile(
Expand Down Expand Up @@ -163,26 +170,26 @@ def identity(generator):
plt.clf()
#plt.show()

import json
def normalize(inmap):
ret = {}
for i in inmap:
ret[i] = [float(j) for j in inmap[i]]
return ret

with open(outputDir+'history.json', 'w') as history:
history.write(json.dumps(normalize(callbacks.history.history)))

plt.plot(*callbacks.timer.points)
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('time [s]')
plt.savefig(outputDir+'loss_vs_time.pdf')
plt.clf()

with open(outputDir+'loss_vs_time.json', 'w') as timeloss:
jmap = {
'elapsed' : callbacks.timer.points[0],
'loss' : callbacks.timer.points[1]
}
timeloss.write(json.dumps(normalize(jmap)))
## import json
## def normalize(inmap):
## ret = {}
## for i in inmap:
## ret[i] = [float(j) for j in inmap[i]]
## return ret
##
## with open(outputDir+'history.json', 'w') as history:
## history.write(json.dumps(normalize(callbacks.history.history)))
##
## plt.plot(*callbacks.timer.points)
## plt.title('model loss')
## plt.ylabel('loss')
## plt.xlabel('time [s]')
## plt.savefig(outputDir+'loss_vs_time.pdf')
## plt.clf()
##
## with open(outputDir+'loss_vs_time.json', 'w') as timeloss:
## jmap = {
## 'elapsed' : callbacks.timer.points[0],
## 'loss' : callbacks.timer.points[1]
## }
## timeloss.write(json.dumps(normalize(jmap)))
56 changes: 56 additions & 0 deletions Train/deepFlavour_noneutral.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@


from training_base import training_base
from Losses import loss_NLL
from modelTools import fixLayersContaining,printLayerInfosAndWeights

#also does all the parsing
train=training_base(testrun=False)

newtraining= not train.modelSet()
#for recovering a training
if newtraining:
from models import model_deepFlavourNoNeutralReference

train.setModel(model_deepFlavourNoNeutralReference,dropoutRate=0.1)

#train.keras_model=fixLayersContaining(train.keras_model, 'regression', invert=False)

train.compileModel(learningrate=0.001,
loss=['categorical_crossentropy',loss_NLL],
metrics=['accuracy'],
loss_weights=[1., 0.000000000001])


print(train.keras_model.summary())
model,history = train.trainModel(nepochs=1,
batchsize=10000,
stop_patience=300,
lr_factor=0.5,
lr_patience=3,
lr_epsilon=0.0001,
lr_cooldown=6,
lr_minimum=0.0001,
maxqsize=100)


print('fixing input norms...')
train.keras_model=fixLayersContaining(train.keras_model, 'input_batchnorm')
train.compileModel(learningrate=0.001,
loss=['categorical_crossentropy',loss_NLL],
metrics=['accuracy'],
loss_weights=[1., 0.000000000001])


print(train.keras_model.summary())
#printLayerInfosAndWeights(train.keras_model)

model,history = train.trainModel(nepochs=60,
batchsize=10000,
stop_patience=300,
lr_factor=0.5,
lr_patience=3,
lr_epsilon=0.0001,
lr_cooldown=6,
lr_minimum=0.0001,
maxqsize=100)
Loading