One Paragraph of project description goes here
These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system.
| """ Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """ | |
| import numpy as np | |
| import cPickle as pickle | |
| import gym | |
| # hyperparameters | |
| H = 200 # number of hidden layer neurons | |
| batch_size = 10 # every how many episodes to do a param update? | |
| learning_rate = 1e-4 | |
| gamma = 0.99 # discount factor for reward |
| { | |
| "metadata": { | |
| "name": "", | |
| "signature": "sha256:a04c38d9604adb7eb9ca89860dfa1ef72db66037cc2c07c391ef8e67a31f9254" | |
| }, | |
| "nbformat": 3, | |
| "nbformat_minor": 0, | |
| "worksheets": [ | |
| { |