Skip to content

Instantly share code, notes, and snippets.

@kislayabhi
Created November 23, 2016 18:32
Show Gist options
  • Select an option

  • Save kislayabhi/6a210cef63c5f9ad07e75a46cc978cd0 to your computer and use it in GitHub Desktop.

Select an option

Save kislayabhi/6a210cef63c5f9ad07e75a46cc978cd0 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import gym\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2016-11-23 12:27:08,888] Making new env: FrozenLake-v0\n"
]
}
],
"source": [
"env = gym.make('FrozenLake-v0')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Initialize table with all zeros\n",
"Q = np.zeros([env.observation_space.n,env.action_space.n])\n",
"# Set learning parameters\n",
"lr = .85\n",
"y = .99\n",
"num_episodes = 12000\n",
"#create lists to contain total rewards.\n",
"rList = []\n",
"for i in range(num_episodes):\n",
" #Reset environment and get first new observation\n",
" s = env.reset()\n",
" rAll = 0\n",
" d = False\n",
" j = 0\n",
" #The Q-Table learning algorithm\n",
" while j < 99:\n",
" j+=1\n",
" #Choose an action by greedily (with noise) picking from Q table\n",
" a = np.argmax(Q[s,:] + np.random.randn(1,env.action_space.n)*(1./(i+1)))\n",
" #Get new state and reward from environment\n",
" s1 ,reward, done, info = env.step(a)\n",
" #Update Q-Table with new knowledge using Bellman equation\n",
" Q[s,a] = Q[s,a] + lr*(reward + y*np.max(Q[s1,:]) - Q[s,a])\n",
" rAll += reward\n",
" s = s1\n",
" if done == True:\n",
" break\n",
" rList.append(rAll)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 3.77121841e-03 2.99653123e-01 4.74620201e-03 7.57315739e-03]\n",
" [ 1.76098937e-05 1.20473444e-03 1.22404041e-03 1.95472804e-01]\n",
" [ 4.12639807e-03 4.33726766e-03 3.86559420e-03 1.66991351e-01]\n",
" [ 9.03864267e-04 9.81592238e-04 1.03553487e-03 1.46342204e-01]\n",
" [ 4.03149683e-01 1.35624784e-03 2.38765578e-03 3.01635443e-04]\n",
" [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n",
" [ 2.44445477e-01 1.21101912e-06 3.29131333e-05 8.10563896e-06]\n",
" [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n",
" [ 1.34581402e-03 1.02979648e-04 2.07313793e-03 5.46227657e-01]\n",
" [ 2.15177190e-06 7.72984134e-01 3.95011537e-04 7.91705032e-04]\n",
" [ 4.28018632e-01 2.80599674e-04 8.32426047e-05 1.04626695e-04]\n",
" [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n",
" [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n",
" [ 0.00000000e+00 0.00000000e+00 8.58116972e-01 0.00000000e+00]\n",
" [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 9.82108949e-01]\n",
" [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]]\n"
]
}
],
"source": [
"print Q"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Score over time: 0.401833333333\n"
]
}
],
"source": [
"print \"Score over time: \" + str(sum(rList)/num_episodes)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[-0.63564746, 0.70953475, -2.01478143, -0.96223985]])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.random.randn(1, env.action_space.n)"
]
},
{
"cell_type": "raw",
"metadata": {
"collapsed": false
},
"source": [
"env.reset()\n",
"\n",
"for i_episode in range(20):\n",
" observation = env.reset()\n",
" for t in range(1000):\n",
" env.render()\n",
" print(observation)\n",
" action = env.action_space.sample()\n",
" #print(action)\n",
" #raw_input(\"Press Enter to continue...\")\n",
" observation, reward, done, info = env.step(action)\n",
" if done:\n",
" print(\"Episode finished after {} timesteps\".format(t+1))\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment