Skip to content

Commit

Permalink
Latest code for paper results (250k)
Browse files Browse the repository at this point in the history
  • Loading branch information
Schram committed Oct 14, 2020
1 parent 60bf3f8 commit 92bf045
Show file tree
Hide file tree
Showing 5 changed files with 525 additions and 512 deletions.
118 changes: 52 additions & 66 deletions agents/dqn.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
import os
import csv
import json
import random
import logging
import numpy as np
import tensorflow as tf

from collections import deque
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Input, GaussianNoise, BatchNormalization
from keras.models import Model
from keras.layers import Dense, Input
from keras.optimizers import Adam
import csv, json, os

import tensorflow as tf

import logging
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger('RL-Logger')
logger.setLevel(logging.ERROR)

#The Deep Q-Network (DQN)

class DQN:
def __init__(self, env,cfg='../cfg/dqn_setup.json'):
def __init__(self, env, cfg='../cfg/dqn_setup.json'):
self.env = env
self.memory = deque(maxlen = 2000)
self.memory = deque(maxlen=2000)
self.avg_reward = 0
self.target_train_counter = 0

Expand All @@ -26,56 +28,42 @@ def __init__(self, env,cfg='../cfg/dqn_setup.json'):
logger.info('Agent action space:{}'.format(self.env.action_space.n))
logger.info('Agent state space:{}'.format(self.env.observation_space.shape))

## Setup GPU cfg
#config = tf.ConfigProto()
#config.gpu_options.allow_growth = True
#sess = tf.Session(config=config)
#set_session(sess)

## Get hyper-parameters from json cfg file
# Get hyper-parameters from json cfg file
data = []
with open(cfg) as json_file:
data = json.load(json_file)

self.search_method = "epsilon"
self.gamma = float(data['gamma']) if float(data['gamma']) else 0.95 # discount rate

self.gamma = float(data['gamma']) if float(data['gamma']) else 0.95 # discount rate
self.epsilon = float(data['epsilon']) if float(data['epsilon']) else 1.0 # exploration rate
self.epsilon_min = float(data['epsilon_min']) if float(data['epsilon_min']) else 0.05
self.epsilon_decay = float(data['epsilon_decay']) if float(data['epsilon_decay']) else 0.995
self.learning_rate = float(data['learning_rate']) if float(data['learning_rate']) else 0.001
self.learning_rate = float(data['learning_rate']) if float(data['learning_rate']) else 0.001
self.batch_size = int(data['batch_size']) if int(data['batch_size']) else 32
self.target_train_interval = int(data['target_train_interval']) if int(data['target_train_interval']) else 1
self.tau = float(data['tau']) if float(data['tau']) else 1.0
self.save_model = ''#data['save_model'] if str(data['save_model']) else './model'
self.warmup_step = float(data['warmup_step']) if float(data['warmup_step']) else 100
self.save_model = ''

self.model = self._build_model()
self.target_model = self._build_model()

## Save infomation ##
train_file_name = "dqn_huber_clipnorm=1_clipvalue05_online_accelerator_lr%s_v4.log" % str(self.learning_rate)
# Save information
train_file_name = "dqn_huber_clipnorm=1_clipvalue05_online_accelerator_lr%s_v4.log" % str(self.learning_rate)
self.train_file = open(train_file_name, 'w')
self.train_writer = csv.writer(self.train_file, delimiter = " ")
self.train_writer = csv.writer(self.train_file, delimiter=" ")

def _build_model(self):
## Input: state ##
# Input: state
state_input = Input(self.env.observation_space.shape)
## Make noisy input data ##
#state_input = GaussianNoise(0.1)(state_input)
## Noisy layer
h1 = Dense(128, activation='relu')(state_input)
#h1 = GaussianNoise(0.1)(h1)
## Noisy layer
h2 = Dense(128, activation='relu')(h1)
#h2 = GaussianNoise(0.1)(h2)
## Output layer
h3 = Dense(128, activation='relu')(h2)
## Output: action ##
output = Dense(self.env.action_space.n,activation='linear')(h3)
# Output: value mapped to action
output = Dense(self.env.action_space.n, activation='linear')(h3)
model = Model(inputs=state_input, outputs=output)
adam = Adam(lr=self.learning_rate, clipnorm=1.0, clipvalue=0.5)
model.compile(loss=tf.keras.losses.Huber(), optimizer=adam)
model.summary()
return model
return model

def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
Expand All @@ -86,26 +74,26 @@ def action(self, state):
if np.random.rand() <= self.epsilon:
logger.info('Random action')
action = random.randrange(self.env.action_space.n)
## Update randomness
if len(self.memory)>(self.batch_size):
# Update randomness
if len(self.memory) > self.batch_size:
self.epsilon_adj()
else:
logger.info('NN action')
np_state = np.array(state).reshape(1,len(state))
np_state = np.array(state).reshape(1, len(state))
logger.info('NN action shape{}'.format(np_state.shape))
act_values = self.target_model.predict(np_state)
action = np.argmax(act_values[0])
policy_type=1
policy_type = 1

return action,policy_type
return action, policy_type

def play(self,state):
def play(self, state):
np_state = np.array(state).reshape(1, len(state))
act_values = self.target_model.predict(np_state)
return np.argmax(act_values[0])

def train(self):
if len(self.memory)<(self.batch_size):
if len(self.memory) < self.batch_size:
return

logger.info('### TRAINING MODEL ###')
Expand All @@ -114,39 +102,37 @@ def train(self):
batch_states = []
batch_target = []
for state, action, reward, next_state, done in minibatch:
np_state = np.array(state).reshape(1,len(state))
np_next_state = np.array(next_state).reshape(1,len(next_state))
expectedQ =0
np_state = np.array(state).reshape(1, len(state))
np_next_state = np.array(next_state).reshape(1, len(next_state))
expectedQ = 0
if not done:
expectedQ = self.gamma*np.amax(self.target_model.predict(np_next_state)[0])
expectedQ = self.gamma * np.amax(self.target_model.predict(np_next_state)[0])
target = reward + expectedQ
#print(type(state))
#print(type(np_state))
target_f = self.target_model.predict(np_state)
target_f[0][action] = target

if batch_states==[]:
batch_states=np_state
batch_target=target_f
batch_states = np_state
batch_target = target_f
else:
batch_states=np.append(batch_states,np_state,axis=0)
batch_target=np.append(batch_target,target_f,axis=0)
history = self.model.fit(batch_states, batch_target, epochs = 1, verbose = 0)
batch_states = np.append(batch_states, np_state, axis=0)
batch_target = np.append(batch_target, target_f, axis=0)

history = self.model.fit(batch_states, batch_target, epochs=1, verbose=0)
losses.append(history.history['loss'][0])
self.train_writer.writerow([np.mean(losses)])
self.train_file.flush()

logger.info('### TRAINING TARGET MODEL ###')
self.target_train()

return np.mean(losses)

def target_train(self):
model_weights = self.model.get_weights()
target_weights =self.target_model.get_weights()
model_weights = self.model.get_weights()
target_weights = self.target_model.get_weights()
for i in range(len(target_weights)):
target_weights[i] = self.tau*model_weights[i] + (1-self.tau)*target_weights[i]
target_weights[i] = self.tau * model_weights[i] + (1 - self.tau) * target_weights[i]
self.target_model.set_weights(target_weights)

def epsilon_adj(self):
Expand All @@ -159,14 +145,14 @@ def load(self, name):
def save(self, name):
abspath = os.path.abspath(self.save_model + name)
path = os.path.dirname(abspath)
if not os.path.exists(path):os.makedirs(path)
if not os.path.exists(path):
os.makedirs(path)
# Save JSON config to disk
model_json_name = self.save_model + name + '.json'
json_config = self.target_model.to_json()
with open(model_json_name, 'w') as json_file:
json_file.write(json_config)
# Save weights to disk
self.target_model.save_weights(self.save_model + name+'.weights.h5')
self.target_model.save(self.save_model + name+'.modelall.h5')
logger.info('### SAVING MODEL '+abspath+'###')

self.target_model.save_weights(self.save_model + name + '.weights.h5')
self.target_model.save(self.save_model + name + '.modelall.h5')
logger.info('### SAVING MODEL ' + abspath + '###')
6 changes: 3 additions & 3 deletions cfg/dqn_setup.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"search_method": "epsilon",
"gamma": "0.95",
"gamma": "0.85",
"epsilon": "1.0",
"epsilon_min" : "0.005",
"epsilon_min" : "0.0025",
"epsilon_decay" : "0.9995",
"learning_rate" : "0.01",
"learning_rate" : "0.005",
"batch_size" : "32",
"warmup_step" : "250",
"tau":"0.5",
Expand Down
Loading

0 comments on commit 92bf045

Please sign in to comment.