DeepDoubleB · jmduarte · Jul 3, 2017 · Aug 8, 2017 · Aug 10, 2017 · Aug 10, 2017
diff --git a/.gitignore b/.gitignore
@@ -12,3 +12,7 @@ __pycache__
 *.png
 *.eps
 *.bak
+*.h5
+*.csv
+*.root
+*.json
diff --git a/Analysis/test.pdf b/Analysis/test.pdf
diff --git a/Train/Plotting/Jan/compareTwoModels.py b/Train/Plotting/Jan/compareTwoModels.py
@@ -0,0 +1,196 @@
+
+
+from argparse import ArgumentParser
+
+parser = ArgumentParser('make a set of ROC curves, comparing two training')
+parser.add_argument('inputDirA')
+parser.add_argument('inputDirB')
+parser.add_argument('outputDir')
+args = parser.parse_args()
+
+
+outdir=args.outputDir+'/'
+
+
+from testing import makeROCs_async, makePlots_async, testDescriptor
+#from keras.models import load_model
+from DataCollection import DataCollection
+
+import os
+os.system('mkdir -p '+outdir)
+
+trainings=[args.inputDirA,
+           args.inputDirB]
+
+
+trainings.extend(trainings)
+
+filesttbar=[]
+for t in trainings:
+    filesttbar.append(t+'/ttbar/tree_association.txt')
+
+filesqcd=[]
+for t in trainings:
+    filesqcd.append(t+'/qcd_600_800/tree_association.txt')
+
+
+legend=['standard','p_{T} cut']
+
+btruth='isB+isBB+isGBB+isLeptonicB+isLeptonicB_C'
+ctruth='isC+isCC+isGCC'
+
+
+bprob=[ 'prob_isB+prob_isBB+prob_isLeptB',
+        'prob_isB+prob_isBB+prob_isLeptB',
+
+        'prob_isB+prob_isBB+prob_isLeptB',
+        'prob_isB+prob_isBB+prob_isLeptB',
+        ]
+
+cprob=[ 'prob_isC',
+        'prob_isC',
+
+        'prob_isC',
+        'prob_isC']
+
+usdprob=['prob_isUDS',
+         'prob_isUDS',
+
+         'prob_isUDS',
+         'prob_isUDS',]
+
+
+
+
+print('creating ROCs')
+
+#makeROCs_async(intextfile, 
+#               name_list, 
+#               probabilities_list, 
+#               truths_list, 
+#               vetos_list, 
+#               colors_list, 
+#               outpdffile, 
+#               cuts, 
+#               cmsstyle, 
+#               firstcomment, 
+#               secondcomment, 
+#               extralegend+None, 
+#               logY=True)
+
+
+
+
+for ptcut in ['30','150']:
+
+    makeROCs_async(intextfile=filesttbar, 
+               name_list=legend, 
+               probabilities_list=bprob, 
+               truths_list=btruth, 
+               vetos_list=len(legend)*['isUD+isS+isG']+len(legend)*['isC'], 
+               colors_list='auto', 
+               outpdffile=outdir+"btag_pt"+ptcut+".pdf", 
+               cuts='jet_pt>'+ptcut, 
+               cmsstyle=True, 
+               firstcomment='t#bar{t} events', 
+               secondcomment='jet p_{T} > '+ptcut+' GeV', 
+               extralegend=None, 
+               logY=True,
+               individual=True
+               )
+
+    makeROCs_async(intextfile=filesttbar, 
+               name_list=legend, 
+               probabilities_list=cprob, 
+               truths_list=ctruth, 
+               vetos_list=len(legend)*['isUD+isS+isG']+len(legend)*[btruth], 
+               colors_list='auto', 
+               outpdffile=outdir+"ctag_pt"+ptcut+".pdf", 
+               cuts='jet_pt>'+ptcut, 
+               cmsstyle=True, 
+               firstcomment='t#bar{t} events', 
+               secondcomment='jet p_{T} > '+ptcut+' GeV', 
+               extralegend=['solid?udsg','dashed?b'], 
+               logY=True,
+               individual=True)
+
+    makeROCs_async(intextfile=filesttbar, 
+               name_list=legend, 
+               probabilities_list=usdprob, 
+               truths_list='isUD+isS', 
+               vetos_list=len(legend)*['isG']+len(legend)*['isB+isLeptonicB+isLeptonicB_C+isC'], 
+               colors_list='auto', 
+               outpdffile=outdir+"gtag_pt"+ptcut+".pdf", 
+               cuts='jet_pt>'+ptcut, 
+               cmsstyle=True, 
+               firstcomment='t#bar{t} events', 
+               secondcomment='jet p_{T} > '+ptcut+' GeV', 
+               extralegend=['solid?g','dashed?bc'], 
+               logY=True,
+               individual=True)
+
+
+makeROCs_async(intextfile=filesqcd, 
+               name_list=legend, 
+               probabilities_list=bprob, 
+               truths_list=btruth, 
+               vetos_list=len(legend)*['isUD+isS+isG']+len(legend)*['isC'], 
+               colors_list='auto', 
+               outpdffile=outdir+"btag_qcd_pt400.pdf", 
+               cuts='jet_pt>400', 
+               cmsstyle=True, 
+               firstcomment='QCD, 600 < p_{T} < 800 GeV', 
+               secondcomment='jet p_{T} > 400 GeV', 
+               extralegend=None, 
+               logY=True,
+               individual=True)
+
+makeROCs_async(intextfile=filesqcd, 
+               name_list=legend, 
+               probabilities_list=cprob, 
+               truths_list=ctruth, 
+               vetos_list=len(legend)*['isUD+isS+isG']+len(legend)*[btruth], 
+               colors_list='auto', 
+               outpdffile=outdir+"ctag_qcd_pt400.pdf", 
+               cuts='jet_pt>400', 
+               cmsstyle=True, 
+               firstcomment='QCD, 600 < p_{T} < 800 GeV', 
+               secondcomment='jet p_{T} > 400 GeV', 
+               extralegend=['solid?udsg','dashed?b'], 
+               logY=True,
+               individual=True)
+
+makeROCs_async(intextfile=filesqcd, 
+               name_list=legend, 
+               probabilities_list=usdprob, 
+               truths_list='isUD+isS', 
+               vetos_list=len(legend)*['isG']+len(legend)*['isB+isLeptonicB+isLeptonicB_C+isC'], 
+               colors_list='auto', 
+               outpdffile=outdir+"gtag_qcd_pt400.pdf", 
+               cuts='jet_pt>400', 
+               cmsstyle=True, 
+               firstcomment='QCD, 600 < p_{T} < 800 GeV', 
+               secondcomment='jet p_{T} > 400 GeV', 
+               extralegend=['solid?g','dashed?bc'], 
+               logY=False,
+               individual=True)  
+
+
+#individual plot for top/ttbar
+
+
+makeROCs_async(intextfile=[filesttbar[1]], 
+               name_list=['DeepFlavour'], 
+               probabilities_list='prob_isUDS+prob_isC', 
+               truths_list='isUD+isS+isC', 
+               vetos_list=1*['isG']+1*['isB+isLeptonicB+isLeptonicB_C'], 
+               colors_list='auto', 
+               outpdffile=outdir+"lightQuarkJets_pt30.pdf", 
+               cuts='jet_pt>400', 
+               cmsstyle=True, 
+               firstcomment='t#bar{t} events', 
+               secondcomment='jet p_{T} > 30 GeV', 
+               extralegend=['solid?g','dashed?b'], 
+               logY=False,
+               individual=True)  
+
diff --git a/Train/QGRegression_recurrent.py b/Train/QGRegression_recurrent.py
@@ -75,7 +75,7 @@
 )
 
 from DataCollection import DataCollection
-from TrainData_PT_recur import TrainData_PT_recur
+from TrainData_PT_recur import TrainData_PT_recur, TrainData_recurrent_fullTruth
 
 traind = DataCollection(args.inputfile)
 traind.useweights = config_args['useweights']
@@ -147,6 +147,13 @@ def identity(generator):
 traind.writeToFile(outputDir+'trainsamples.dc')
 testd.writeToFile( outputDir+'valsamples.dc')
 
+#make sure tokens don't expire
+from tokenTools import checkTokens, renew_token_process
+from thread import start_new_thread
+
+checkTokens()
+start_new_thread(renew_token_process,())
+
 print 'training'
 try:
     model.fit_generator(

diff --git a/Train/QGRegression_simple.py b/Train/QGRegression_simple.py
@@ -104,6 +104,13 @@
 traind.writeToFile(outputDir+'trainsamples.dc')
 testd.writeToFile( outputDir+'valsamples.dc')
 
+#make sure tokens don't expire
+from tokenTools import checkTokens, renew_token_process
+from thread import start_new_thread
+
+checkTokens()
+start_new_thread(renew_token_process,())
+
 print 'training'
 try:
     model.fit_generator(

diff --git a/Train/QG_Class_Regr_image.py b/Train/QG_Class_Regr_image.py
@@ -48,7 +48,7 @@
 # configure the in/out/split etc
 config_args = { #we might want to move it to an external file
    'testrun'   : False,
-   'nepochs'   : 2,
+   'nepochs'   : 100,
    'batchsize' : 2000,
    'startlearnrate' : 0.0005,
    'useweights' : False,
@@ -93,6 +93,13 @@ def identity(generator):
     for i in generator:
         yield i
 
+#make sure tokens don't expire
+from tokenTools import checkTokens, renew_token_process
+from thread import start_new_thread
+
+checkTokens()
+start_new_thread(renew_token_process,())
+
 if args.mode == 'class':
     model = TrainData_image.classification_model(input_shapes, output_shapes[0])
     model.compile(
@@ -163,26 +170,26 @@ def identity(generator):
 plt.clf()
 #plt.show()
 
-import json
-def normalize(inmap):
-    ret = {}
-    for i in inmap:
-        ret[i] = [float(j) for j in inmap[i]]
-    return ret
-
-with open(outputDir+'history.json', 'w') as history:
-    history.write(json.dumps(normalize(callbacks.history.history)))
-
-plt.plot(*callbacks.timer.points)
-plt.title('model loss')
-plt.ylabel('loss')
-plt.xlabel('time [s]')
-plt.savefig(outputDir+'loss_vs_time.pdf')
-plt.clf()
-
-with open(outputDir+'loss_vs_time.json', 'w') as timeloss:
-    jmap = {
-        'elapsed' : callbacks.timer.points[0],
-        'loss' : callbacks.timer.points[1]
-    }
-    timeloss.write(json.dumps(normalize(jmap)))
+## import json
+## def normalize(inmap):
+##     ret = {}
+##     for i in inmap:
+##         ret[i] = [float(j) for j in inmap[i]]
+##     return ret
+##                   
+## with open(outputDir+'history.json', 'w') as history:
+##     history.write(json.dumps(normalize(callbacks.history.history)))
+## 
+## plt.plot(*callbacks.timer.points)
+## plt.title('model loss')
+## plt.ylabel('loss')
+## plt.xlabel('time [s]')
+## plt.savefig(outputDir+'loss_vs_time.pdf')
+## plt.clf()
+## 
+## with open(outputDir+'loss_vs_time.json', 'w') as timeloss:
+##     jmap = {
+##         'elapsed' : callbacks.timer.points[0],
+##         'loss' : callbacks.timer.points[1]
+##     }
+##     timeloss.write(json.dumps(normalize(jmap)))
diff --git a/Train/deepFlavour_noneutral.py b/Train/deepFlavour_noneutral.py
@@ -0,0 +1,56 @@
+
+
+from training_base import training_base
+from Losses import loss_NLL
+from modelTools import fixLayersContaining,printLayerInfosAndWeights
+
+#also does all the parsing
+train=training_base(testrun=False)
+
+newtraining= not train.modelSet()
+#for recovering a training
+if newtraining:
+    from models import model_deepFlavourNoNeutralReference
+
+    train.setModel(model_deepFlavourNoNeutralReference,dropoutRate=0.1)
+
+    #train.keras_model=fixLayersContaining(train.keras_model, 'regression', invert=False)
+
+    train.compileModel(learningrate=0.001,
+                       loss=['categorical_crossentropy',loss_NLL],
+                       metrics=['accuracy'],
+                       loss_weights=[1., 0.000000000001])
+
+
+print(train.keras_model.summary())
+model,history = train.trainModel(nepochs=1, 
+                                 batchsize=10000, 
+                                 stop_patience=300, 
+                                 lr_factor=0.5, 
+                                 lr_patience=3, 
+                                 lr_epsilon=0.0001, 
+                                 lr_cooldown=6, 
+                                 lr_minimum=0.0001, 
+                                 maxqsize=100)
+
+
+print('fixing input norms...')
+train.keras_model=fixLayersContaining(train.keras_model, 'input_batchnorm')
+train.compileModel(learningrate=0.001,
+                       loss=['categorical_crossentropy',loss_NLL],
+                       metrics=['accuracy'],
+                       loss_weights=[1., 0.000000000001])
+
+
+print(train.keras_model.summary())
+#printLayerInfosAndWeights(train.keras_model)
+
+model,history = train.trainModel(nepochs=60, 
+                                 batchsize=10000, 
+                                 stop_patience=300, 
+                                 lr_factor=0.5, 
+                                 lr_patience=3, 
+                                 lr_epsilon=0.0001, 
+                                 lr_cooldown=6, 
+                                 lr_minimum=0.0001, 
+                                 maxqsize=100)
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,3 +12,7 @@ __pycache__ @@
     *.png
     *.eps
     *.bak
+    *.h5
+    *.csv
+    *.root
+    *.json