This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def train(mnist, max_layers): | |
| sess = tf.Session() | |
| global_step = tf.Variable(0, trainable=False) | |
| starter_learning_rate = 0.1 | |
| learning_rate = tf.train.exponential_decay(0.99, global_step, | |
| 500, 0.96, staircase=True) | |
| optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate) | |
| reinforce = Reinforce(sess, optimizer, policy_network, args.max_layers, global_step) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| with tf.Session() as train_sess: | |
| init = tf.global_variables_initializer() | |
| train_sess.run(init) | |
| for step in range(self.max_step_per_action): | |
| batch_x, batch_y = self.mnist.train.next_batch(self.bathc_size) | |
| feed = {model.X: batch_x, | |
| model.Y: batch_y, | |
| model.dropout_keep_prob: self.dropout_rate, | |
| model.cnn_dropout_rates: cnn_drop_rate} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_reward(self, action, step, pre_acc): | |
| action = [action[0][0][x:x+4] for x in range(0, len(action[0][0]), 4)] | |
| cnn_drop_rate = [c[3] for c in action] | |
| Then we formed bathc with hyperparameters for every layer in "action" and we created cnn_drop_rate – list of dropout rates for every layer. | |
| Now let's create new CNN with new architecture: | |
| with tf.Graph().as_default() as g: | |
| with g.container('experiment'+str(step)): | |
| model = CNN(self.num_input, self.num_classes, action) | |
| loss_op = tf.reduce_mean(model.loss) | |
| optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class NetManager(): | |
| def __init__(self, num_input, num_classes, learning_rate, mnist, | |
| max_step_per_action=5500, | |
| bathc_size=100, | |
| dropout_rate=0.85): | |
| self.num_input = num_input | |
| self.num_classes = num_classes | |
| self.learning_rate = learning_rate | |
| self.mnist = mnist |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def store_rollout(self, state, reward): | |
| self.reward_buffer.append(reward) | |
| self.state_buffer.append(state[0]) | |
| def train_step(self, steps_count): | |
| states = np.array(self.state_buffer[-steps_count:])/self.division_rate | |
| rewars = self.reward_buffer[-steps_count:] | |
| _, ls = self.sess.run([self.train_op, self.loss], | |
| {self.states: states, | |
| self.discounted_rewards: rewars}) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_action(self, state): | |
| return self.sess.run(self.predicted_action, {self.states: state}) | |
| if random.random() < self.exploration: | |
| return np.array([[random.sample(range(1, 35), 4*self.max_layers)]]) | |
| else: | |
| return self.sess.run(self.predicted_action, {self.states: state}) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| self.create_variables() | |
| var_lists = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) | |
| self.sess.run(tf.variables_initializer(var_lists)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def create_variables(self): | |
| with tf.name_scope("model_inputs"): | |
| # raw state representation | |
| self.states = tf.placeholder(tf.float32, [None, self.max_layers*4], name="states") | |
| with tf.name_scope("predict_actions"): | |
| # initialize policy network | |
| with tf.variable_scope("policy_network"): | |
| self.policy_outputs = self.policy_network(self.states, self.max_layers) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class Reinforce(): | |
| def __init__(self, sess, optimizer, policy_network, max_layers, global_step, | |
| division_rate=100.0, | |
| reg_param=0.001, | |
| discount_factor=0.99, | |
| exploration=0.3): | |
| self.sess = sess | |
| self.optimizer = optimizer | |
| self.policy_network = policy_network | |
| self.division_rate = division_rate |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def policy_network(state, max_layers): | |
| with tf.name_scope("policy_network"): | |
| nas_cell = tf.contrib.rnn.NASCell(4*max_layers) | |
| outputs, state = tf.nn.dynamic_rnn( | |
| nas_cell, | |
| tf.expand_dims(state, -1), | |
| dtype=tf.float32 | |
| ) | |
| bias = tf.Variable([0.05]*4*max_layers) | |
| outputs = tf.nn.bias_add(outputs, bias) |