Skip to content

Commit 91df465

Browse files
Updated notebooks
1 parent d6322e1 commit 91df465

15 files changed

+87
-104
lines changed

Chapter01/06_neural_evolutionary_agent.ipynb

Lines changed: 6 additions & 6 deletions
Large diffs are not rendered by default.

Chapter02/3_temporal_difference_learning.ipynb

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

Chapter02/4_monte_carlo_prediction_and_control_rl.ipynb

Lines changed: 2 additions & 2 deletions
Large diffs are not rendered by default.

Chapter02/5_sarsa_sarsa_lambda.ipynb

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

Chapter02/6_q_learning.ipynb

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

Chapter02/7_policy_gradients.ipynb

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,13 @@
299299
"Episode#:0 ep_reward:-147.0\r",
300300
"Episode#:0 ep_reward:-148.0\r",
301301
"Episode#:0 ep_reward:-149.0\r",
302-
"Episode#:0 ep_reward:-150.0\r",
302+
"Episode#:0 ep_reward:-150.0\r"
303+
]
304+
},
305+
{
306+
"name": "stdout",
307+
"output_type": "stream",
308+
"text": [
303309
"Episode#:0 ep_reward:-151.0\r",
304310
"Episode#:0 ep_reward:-152.0\r",
305311
"Episode#:0 ep_reward:-153.0\r",
@@ -313,13 +319,7 @@
313319
"Episode#:0 ep_reward:-161.0\r",
314320
"Episode#:0 ep_reward:-162.0\r",
315321
"Episode#:0 ep_reward:-163.0\r",
316-
"Episode#:0 ep_reward:-164.0\r"
317-
]
318-
},
319-
{
320-
"name": "stdout",
321-
"output_type": "stream",
322-
"text": [
322+
"Episode#:0 ep_reward:-164.0\r",
323323
"Episode#:0 ep_reward:-165.0\r",
324324
"Episode#:0 ep_reward:-166.0\r",
325325
"Episode#:0 ep_reward:-167.0\r",
@@ -530,20 +530,20 @@
530530
"Episode#:1 ep_reward:-158.0\r",
531531
"Episode#:1 ep_reward:-159.0\r",
532532
"Episode#:1 ep_reward:-160.0\r",
533-
"Episode#:1 ep_reward:-161.0\r",
534-
"Episode#:1 ep_reward:-162.0\r",
535-
"Episode#:1 ep_reward:-163.0\r",
536-
"Episode#:1 ep_reward:-164.0\r",
537-
"Episode#:1 ep_reward:-165.0\r",
538-
"Episode#:1 ep_reward:-166.0\r",
539-
"Episode#:1 ep_reward:-167.0\r",
540-
"Episode#:1 ep_reward:-168.0\r"
533+
"Episode#:1 ep_reward:-161.0\r"
541534
]
542535
},
543536
{
544537
"name": "stdout",
545538
"output_type": "stream",
546539
"text": [
540+
"Episode#:1 ep_reward:-162.0\r",
541+
"Episode#:1 ep_reward:-163.0\r",
542+
"Episode#:1 ep_reward:-164.0\r",
543+
"Episode#:1 ep_reward:-165.0\r",
544+
"Episode#:1 ep_reward:-166.0\r",
545+
"Episode#:1 ep_reward:-167.0\r",
546+
"Episode#:1 ep_reward:-168.0\r",
547547
"Episode#:1 ep_reward:-169.0\r",
548548
"Episode#:1 ep_reward:-170.0\r",
549549
"Episode#:1 ep_reward:-171.0\r",

Chapter02/8_actor_critic_agent.ipynb

Lines changed: 21 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -162,84 +162,55 @@
162162
"Episode#:0 ep_reward:8.0\r",
163163
"Episode#:0 ep_reward:9.0\r",
164164
"Episode#:0 ep_reward:10.0\r",
165-
"Episode#:0 ep_reward:11.0\r"
165+
"Episode#:0 ep_reward:11.0\r",
166+
"Episode#:0 ep_reward:12.0\r"
166167
]
167168
},
168169
{
169170
"name": "stdout",
170171
"output_type": "stream",
171172
"text": [
172-
"Episode#:0 ep_reward:12.0\r",
173173
"Episode#:0 ep_reward:13.0\r",
174174
"Episode#:0 ep_reward:14.0\r",
175175
"Episode#:0 ep_reward:15.0\r",
176-
"Episode#:0 ep_reward:16.0\r\n",
177-
"\n",
176+
"Episode#:0 ep_reward:16.0\r",
178177
"Episode#:0 ep_reward:17.0\r",
179-
"Episode#:1 ep_reward:1.0\r",
180-
"Episode#:1 ep_reward:2.0\r"
178+
"Episode#:0 ep_reward:18.0\r",
179+
"Episode#:0 ep_reward:19.0\r",
180+
"Episode#:0 ep_reward:20.0\r"
181181
]
182182
},
183183
{
184184
"name": "stdout",
185185
"output_type": "stream",
186186
"text": [
187+
"Episode#:0 ep_reward:21.0\r",
188+
"Episode#:0 ep_reward:22.0\r",
189+
"Episode#:0 ep_reward:23.0\r",
190+
"Episode#:0 ep_reward:24.0\r\n",
191+
"\n",
192+
"Episode#:0 ep_reward:25.0\r",
193+
"Episode#:1 ep_reward:1.0\r",
194+
"Episode#:1 ep_reward:2.0\r",
187195
"Episode#:1 ep_reward:3.0\r",
188196
"Episode#:1 ep_reward:4.0\r",
189197
"Episode#:1 ep_reward:5.0\r",
190198
"Episode#:1 ep_reward:6.0\r",
191199
"Episode#:1 ep_reward:7.0\r",
192-
"Episode#:1 ep_reward:8.0\r",
193-
"Episode#:1 ep_reward:9.0\r",
194-
"Episode#:1 ep_reward:10.0\r",
195-
"Episode#:1 ep_reward:11.0\r",
196-
"Episode#:1 ep_reward:12.0\r",
197-
"Episode#:1 ep_reward:13.0\r"
198-
]
199-
},
200-
{
201-
"name": "stdout",
202-
"output_type": "stream",
203-
"text": [
204-
"Episode#:1 ep_reward:14.0\r",
205-
"Episode#:1 ep_reward:15.0\r",
206-
"Episode#:1 ep_reward:16.0\r",
207-
"Episode#:1 ep_reward:17.0\r",
208-
"Episode#:1 ep_reward:18.0\r",
209-
"Episode#:1 ep_reward:19.0\r",
210-
"Episode#:1 ep_reward:20.0\r",
211-
"Episode#:1 ep_reward:21.0\r",
212-
"Episode#:1 ep_reward:22.0\r"
213-
]
214-
},
215-
{
216-
"name": "stdout",
217-
"output_type": "stream",
218-
"text": [
219-
"Episode#:1 ep_reward:23.0\r",
220-
"Episode#:1 ep_reward:24.0\r",
221-
"Episode#:1 ep_reward:25.0\r",
222-
"Episode#:1 ep_reward:26.0\r",
223-
"Episode#:1 ep_reward:27.0\r",
224-
"Episode#:1 ep_reward:28.0\r",
225-
"Episode#:1 ep_reward:29.0\r",
226-
"Episode#:1 ep_reward:30.0\r",
227-
"Episode#:1 ep_reward:31.0\r",
228-
"Episode#:1 ep_reward:32.0\r",
229-
"Episode#:1 ep_reward:33.0\r"
200+
"Episode#:1 ep_reward:8.0\r"
230201
]
231202
},
232203
{
233204
"name": "stdout",
234205
"output_type": "stream",
235206
"text": [
236-
"Episode#:1 ep_reward:34.0\r",
237-
"Episode#:1 ep_reward:35.0\r",
238-
"Episode#:1 ep_reward:36.0\r",
239-
"Episode#:1 ep_reward:37.0\r",
240-
"Episode#:1 ep_reward:38.0\r\n",
207+
"Episode#:1 ep_reward:9.0\r",
208+
"Episode#:1 ep_reward:10.0\r",
209+
"Episode#:1 ep_reward:11.0\r",
210+
"Episode#:1 ep_reward:12.0\r",
211+
"Episode#:1 ep_reward:13.0\r\n",
241212
"\n",
242-
"Episode#:1 ep_reward:39.0\r"
213+
"Episode#:1 ep_reward:14.0\r"
243214
]
244215
}
245216
],

Chapter03/1_double_dqn.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@
7272
"name": "stdout",
7373
"output_type": "stream",
7474
"text": [
75-
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DoubleDQN/CartPole-v0/20221216-040903\n"
75+
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DoubleDQN/CartPole-v0/20230130-100446\n"
7676
]
7777
}
7878
],
@@ -216,14 +216,14 @@
216216
"name": "stdout",
217217
"output_type": "stream",
218218
"text": [
219-
"Episode#0 Reward:14.0\n"
219+
"Episode#0 Reward:11.0\n"
220220
]
221221
},
222222
{
223223
"name": "stdout",
224224
"output_type": "stream",
225225
"text": [
226-
"Episode#1 Reward:32.0\n"
226+
"Episode#1 Reward:14.0\n"
227227
]
228228
}
229229
],

Chapter03/1_dqn.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
"name": "stdout",
8181
"output_type": "stream",
8282
"text": [
83-
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DQN/CartPole-v0/20221216-041406\n"
83+
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DQN/CartPole-v0/20230130-100435\n"
8484
]
8585
}
8686
],
@@ -221,14 +221,14 @@
221221
"name": "stdout",
222222
"output_type": "stream",
223223
"text": [
224-
"Episode#0 Reward:22.0\n"
224+
"Episode#0 Reward:25.0\n"
225225
]
226226
},
227227
{
228228
"name": "stdout",
229229
"output_type": "stream",
230230
"text": [
231-
"Episode#1 Reward:26.0\n"
231+
"Episode#1 Reward:18.0\n"
232232
]
233233
}
234234
],

Chapter03/2_dueling_dqn.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
"name": "stdout",
8181
"output_type": "stream",
8282
"text": [
83-
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DuelingDQN/CartPole-v0/20221216-041353\n"
83+
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DuelingDQN/CartPole-v0/20230130-100451\n"
8484
]
8585
}
8686
],
@@ -225,14 +225,14 @@
225225
"name": "stdout",
226226
"output_type": "stream",
227227
"text": [
228-
"Episode#0 Reward:35.0\n"
228+
"Episode#0 Reward:25.0\n"
229229
]
230230
},
231231
{
232232
"name": "stdout",
233233
"output_type": "stream",
234234
"text": [
235-
"Episode#1 Reward:37.0\n"
235+
"Episode#1 Reward:32.0\n"
236236
]
237237
}
238238
],

Chapter03/3_dueling_double_dqn.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
"name": "stdout",
8181
"output_type": "stream",
8282
"text": [
83-
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DuelingDoubleDQN/CartPole-v0/20221216-041400\n"
83+
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DuelingDoubleDQN/CartPole-v0/20230130-100456\n"
8484
]
8585
}
8686
],
@@ -223,14 +223,14 @@
223223
"name": "stdout",
224224
"output_type": "stream",
225225
"text": [
226-
"Episode#0 Reward:9.0\n"
226+
"Episode#0 Reward:8.0\n"
227227
]
228228
},
229229
{
230230
"name": "stdout",
231231
"output_type": "stream",
232232
"text": [
233-
"Episode#1 Reward:61.0\n"
233+
"Episode#1 Reward:37.0\n"
234234
]
235235
}
236236
],

Chapter03/4_drqn.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@
8181
"name": "stdout",
8282
"output_type": "stream",
8383
"text": [
84-
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DRQN/CartPole-v0/20221216-040909\n"
84+
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DRQN/CartPole-v0/20230130-100440\n"
8585
]
8686
}
8787
],
@@ -245,7 +245,7 @@
245245
"name": "stdout",
246246
"output_type": "stream",
247247
"text": [
248-
"Episode#1 Reward:12.0\n"
248+
"Episode#1 Reward:13.0\n"
249249
]
250250
}
251251
],

Chapter03/5_a3c_continuous.ipynb

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
"name": "stdout",
7979
"output_type": "stream",
8080
"text": [
81-
"Saving training logs to:logs/TFRL-Cookbook-Ch3-A3C/MountainCarContinuous-v0/20221216-040914\n"
81+
"Saving training logs to:logs/TFRL-Cookbook-Ch3-A3C/MountainCarContinuous-v0/20230130-100501\n"
8282
]
8383
}
8484
],
@@ -335,30 +335,42 @@
335335
"name": "stdout",
336336
"output_type": "stream",
337337
"text": [
338-
"Episode#0 Reward:-59.33250581328509\n"
338+
"Episode#0 Reward:63.22839616711149\n"
339339
]
340340
},
341341
{
342342
"name": "stdout",
343343
"output_type": "stream",
344344
"text": [
345-
"Episode#1 Reward:-58.86206254933267\n",
346-
"Episode#2 Reward:-58.02749257868699\n"
345+
"Episode#1 Reward:-58.2003516173585\n"
347346
]
348347
},
349348
{
350349
"name": "stdout",
351350
"output_type": "stream",
352351
"text": [
353-
"Episode#3 Reward:-56.900926962313946\n"
352+
"Episode#2 Reward:-59.64766619678759\n"
354353
]
355354
},
356355
{
357356
"name": "stdout",
358357
"output_type": "stream",
359358
"text": [
360-
"Episode#4 Reward:-70.24642057572514\n",
361-
"Episode#5 Reward:-71.44694806228125\n"
359+
"Episode#3 Reward:-59.39719396371903\n"
360+
]
361+
},
362+
{
363+
"name": "stdout",
364+
"output_type": "stream",
365+
"text": [
366+
"Episode#4 Reward:-72.61610262283595\n"
367+
]
368+
},
369+
{
370+
"name": "stdout",
371+
"output_type": "stream",
372+
"text": [
373+
"Episode#5 Reward:40.24396887502007\n"
362374
]
363375
}
364376
],

Chapter03/6_ppo_continuous.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
"name": "stdout",
8181
"output_type": "stream",
8282
"text": [
83-
"Saving training logs to:logs/TFRL-Cookbook-Ch3-PPO/Pendulum-v0/20221216-041316\n"
83+
"Saving training logs to:logs/TFRL-Cookbook-Ch3-PPO/Pendulum-v0/20230130-100857\n"
8484
]
8585
}
8686
],
@@ -310,14 +310,14 @@
310310
"name": "stdout",
311311
"output_type": "stream",
312312
"text": [
313-
"Episode#0 Reward:-1028.2436740177866\n"
313+
"Episode#0 Reward:-1285.2097091154558\n"
314314
]
315315
},
316316
{
317317
"name": "stdout",
318318
"output_type": "stream",
319319
"text": [
320-
"Episode#1 Reward:-973.3971378764863\n"
320+
"Episode#1 Reward:-1227.41353179164\n"
321321
]
322322
}
323323
],

Chapter03/7_ddpg.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@
7979
"name": "stdout",
8080
"output_type": "stream",
8181
"text": [
82-
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DDPG/Pendulum-v0/20221216-041338\n"
82+
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DDPG/Pendulum-v0/20230130-100843\n"
8383
]
8484
}
8585
],
@@ -318,14 +318,14 @@
318318
"name": "stdout",
319319
"output_type": "stream",
320320
"text": [
321-
"Episode#0 Reward:-1496.6334247945872\n"
321+
"Episode#0 Reward:-1488.0215868208625\n"
322322
]
323323
},
324324
{
325325
"name": "stdout",
326326
"output_type": "stream",
327327
"text": [
328-
"Episode#1 Reward:-1550.0184041837888\n"
328+
"Episode#1 Reward:-1633.9064103119488\n"
329329
]
330330
}
331331
],

0 commit comments

Comments
 (0)