Created
November 23, 2016 18:32
-
-
Save kislayabhi/6a210cef63c5f9ad07e75a46cc978cd0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import gym\n", | |
| "import numpy as np" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "[2016-11-23 12:27:08,888] Making new env: FrozenLake-v0\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "env = gym.make('FrozenLake-v0')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#Initialize table with all zeros\n", | |
| "Q = np.zeros([env.observation_space.n,env.action_space.n])\n", | |
| "# Set learning parameters\n", | |
| "lr = .85\n", | |
| "y = .99\n", | |
| "num_episodes = 12000\n", | |
| "#create lists to contain total rewards.\n", | |
| "rList = []\n", | |
| "for i in range(num_episodes):\n", | |
| " #Reset environment and get first new observation\n", | |
| " s = env.reset()\n", | |
| " rAll = 0\n", | |
| " d = False\n", | |
| " j = 0\n", | |
| " #The Q-Table learning algorithm\n", | |
| " while j < 99:\n", | |
| " j+=1\n", | |
| " #Choose an action by greedily (with noise) picking from Q table\n", | |
| " a = np.argmax(Q[s,:] + np.random.randn(1,env.action_space.n)*(1./(i+1)))\n", | |
| " #Get new state and reward from environment\n", | |
| " s1 ,reward, done, info = env.step(a)\n", | |
| " #Update Q-Table with new knowledge using Bellman equation\n", | |
| " Q[s,a] = Q[s,a] + lr*(reward + y*np.max(Q[s1,:]) - Q[s,a])\n", | |
| " rAll += reward\n", | |
| " s = s1\n", | |
| " if done == True:\n", | |
| " break\n", | |
| " rList.append(rAll)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[[ 3.77121841e-03 2.99653123e-01 4.74620201e-03 7.57315739e-03]\n", | |
| " [ 1.76098937e-05 1.20473444e-03 1.22404041e-03 1.95472804e-01]\n", | |
| " [ 4.12639807e-03 4.33726766e-03 3.86559420e-03 1.66991351e-01]\n", | |
| " [ 9.03864267e-04 9.81592238e-04 1.03553487e-03 1.46342204e-01]\n", | |
| " [ 4.03149683e-01 1.35624784e-03 2.38765578e-03 3.01635443e-04]\n", | |
| " [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n", | |
| " [ 2.44445477e-01 1.21101912e-06 3.29131333e-05 8.10563896e-06]\n", | |
| " [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n", | |
| " [ 1.34581402e-03 1.02979648e-04 2.07313793e-03 5.46227657e-01]\n", | |
| " [ 2.15177190e-06 7.72984134e-01 3.95011537e-04 7.91705032e-04]\n", | |
| " [ 4.28018632e-01 2.80599674e-04 8.32426047e-05 1.04626695e-04]\n", | |
| " [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n", | |
| " [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n", | |
| " [ 0.00000000e+00 0.00000000e+00 8.58116972e-01 0.00000000e+00]\n", | |
| " [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 9.82108949e-01]\n", | |
| " [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print Q" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Score over time: 0.401833333333\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print \"Score over time: \" + str(sum(rList)/num_episodes)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([[-0.63564746, 0.70953475, -2.01478143, -0.96223985]])" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "np.random.randn(1, env.action_space.n)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "raw", | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "source": [ | |
| "env.reset()\n", | |
| "\n", | |
| "for i_episode in range(20):\n", | |
| " observation = env.reset()\n", | |
| " for t in range(1000):\n", | |
| " env.render()\n", | |
| " print(observation)\n", | |
| " action = env.action_space.sample()\n", | |
| " #print(action)\n", | |
| " #raw_input(\"Press Enter to continue...\")\n", | |
| " observation, reward, done, info = env.step(action)\n", | |
| " if done:\n", | |
| " print(\"Episode finished after {} timesteps\".format(t+1))\n", | |
| " break" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 2", | |
| "language": "python", | |
| "name": "python2" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 2 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython2", | |
| "version": "2.7.12" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment