Skorkmaz88 · February 4, 2019 14:52
diff --git a/code.py b/code.py
 #!/usr/bin/python
 # -*- coding: utf-8 -*-

 done = False
 highest_episode_reward = 3

 lasthundredscores = deque(maxlen=100)

 lasthundredscores_history = []

 overall_time = 157960

 agent.load('breakout_weights')
 agent.epsilon = 0.72

 for e in range(100000):  # n_episodes
    state = env.reset()

  # state = np.reshape(state, [1, state_size])

    episode_reward = 0
    for time in range(10000):
        overall_time += 1
        if time == 50 and e == 0:
            del agent.memory[0]
            del agent.memory[0]
            del agent.memory[0]

        if time % 50 == 49 and time > 49:
            agent.replay(batch_size)

        if time % 10000 == 9999:
            agent.update_target_model()

        action = agent.act(state)

        (next_state, reward, done, _) = env.step(action)

        episode_reward += reward

        agent.remember(state, action, reward, next_state, done)

        state = next_state

        if done:
            #agent.replay(batch_size)


            print('Episode: {}/{}, Time: {}, Overall time: {}, Score: {} e: {:.2}'.format(
                e,
                n_episodes,
                time,
                overall_time,
                episode_reward,
                agent.epsilon,
                ))
            lasthundredscores.append(episode_reward)

      # if len(lasthundredscores) > 10:
        # print ("Mean score of last 100 episodes: " + str( statistics.mean(lasthundredscores)  ) , "Std.dev: " + str(statistics.stdev(lasthundredscores)))

            if episode_reward > highest_episode_reward or e % 100 == 99:
                highest_episode_reward = episode_reward
                agent.save('breakout_weights'
                           + str(episode_reward))
                print('weights saved')

            episode_reward = 0
            break
    


    if e % 100 == 99:
        if agent.epsilon > 0.03:
            agent.epsilon -= 0.005
        clear_output()

        lasthundredscores_history.append(statistics.mean(lasthundredscores))
        print('Mean scores of 100 episodes' \
            + str(statistics.mean(lasthundredscores)))

        plt.plot(lasthundredscores_history)
        plt.ylabel('Mean scores of 100 episodes')
        plt.show()
	#!/usr/bin/python
	# -- coding: utf-8 --

	done = False
	highest_episode_reward = 3

	lasthundredscores = deque(maxlen=100)

	lasthundredscores_history = []

	overall_time = 157960

	agent.load('breakout_weights')
	agent.epsilon = 0.72

	for e in range(100000): # n_episodes
	state = env.reset()

	# state = np.reshape(state, [1, state_size])

	episode_reward = 0
	for time in range(10000):
	overall_time += 1
	if time == 50 and e == 0:
	del agent.memory[0]
	del agent.memory[0]
	del agent.memory[0]

	if time % 50 == 49 and time > 49:
	agent.replay(batch_size)

	if time % 10000 == 9999:
	agent.update_target_model()

	action = agent.act(state)

	(next_state, reward, done, _) = env.step(action)

	episode_reward += reward

	agent.remember(state, action, reward, next_state, done)

	state = next_state

	if done:
	#agent.replay(batch_size)


	print('Episode: {}/{}, Time: {}, Overall time: {}, Score: {} e: {:.2}'.format(
	e,
	n_episodes,
	time,
	overall_time,
	episode_reward,
	agent.epsilon,
	))
	lasthundredscores.append(episode_reward)

	# if len(lasthundredscores) > 10:
	# print ("Mean score of last 100 episodes: " + str( statistics.mean(lasthundredscores) ) , "Std.dev: " + str(statistics.stdev(lasthundredscores)))

	if episode_reward > highest_episode_reward or e % 100 == 99:
	highest_episode_reward = episode_reward
	agent.save('breakout_weights'
	+ str(episode_reward))
	print('weights saved')

	episode_reward = 0
	break



	if e % 100 == 99:
	if agent.epsilon > 0.03:
	agent.epsilon -= 0.005
	clear_output()

	lasthundredscores_history.append(statistics.mean(lasthundredscores))
	print('Mean scores of 100 episodes' \
	+ str(statistics.mean(lasthundredscores)))

	plt.plot(lasthundredscores_history)
	plt.ylabel('Mean scores of 100 episodes')
	plt.show()
No results found