From 13369d907824ff299cefe4085c39eba1e4572f86 Mon Sep 17 00:00:00 2001 From: rasbt Date: Sun, 3 Nov 2019 17:33:28 -0600 Subject: [PATCH] ch18 fixes --- ch18/cartpole/main.py | 52 +++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/ch18/cartpole/main.py b/ch18/cartpole/main.py index 56a288be..ada14e7a 100644 --- a/ch18/cartpole/main.py +++ b/ch18/cartpole/main.py @@ -1,7 +1,24 @@ +# coding: utf-8 + +# Python Machine Learning 3rd Edition by +# Sebastian Raschka (https://sebastianraschka.com) & Vahid Mirjalili](http://vahidmirjalili.com) +# Packt Publishing Ltd. 2019 +# +# Code Repository: https://github.com/rasbt/python-machine-learning-book-3rd-edition +# +# Code License: MIT License (https://github.com/rasbt/python-machine-learning-book-3rd-edition/blob/master/LICENSE.txt) + +############################################################################ +# Chapter 18: Reinforcement Learning +############################################################################ + +# Script: carpole/main.py + import gym import numpy as np import tensorflow as tf import random +import matplotlib.pyplot as plt from collections import namedtuple from collections import deque @@ -35,18 +52,18 @@ def __init__( def _build_nn_model(self, n_layers=3): self.model = tf.keras.Sequential() - ## Hidden layers + # Hidden layers for n in range(n_layers - 1): self.model.add(tf.keras.layers.Dense( units=32, activation='relu')) self.model.add(tf.keras.layers.Dense( units=32, activation='relu')) - ## Last layer + # Last layer self.model.add(tf.keras.layers.Dense( units=self.action_size)) - ## Build & compile model + # Build & compile model self.model.build(input_shape=(None, self.state_size)) self.model.compile( loss='mse', @@ -71,7 +88,7 @@ def _learn(self, batch_samples): target = (r + self.gamma * np.amax( self.model.predict(next_s)[0] - ) + ) ) target_all = self.model.predict(s)[0] target_all[a] = target @@ -92,19 +109,20 @@ def replay(self, batch_size): history = self._learn(samples) return history.history['loss'][0] - def plot_learning_history(history): - fig = plt.figure(1, figsize=(14, 5)) - ax = fig.add_subplot(1, 1, 1) - episodes = np.arange(len(history[0])) + 1 - plt.plot(episodes, history[0], lw=4, - marker='o', markersize=10) - ax.tick_params(axis='both', which='major', labelsize=15) - plt.xlabel('Episodes', size=20) - plt.ylabel('# Total Rewards', size=20) - plt.show() + +def plot_learning_history(history): + fig = plt.figure(1, figsize=(14, 5)) + ax = fig.add_subplot(1, 1, 1) + episodes = np.arange(len(history[0])) + 1 + plt.plot(episodes, history[0], lw=4, + marker='o', markersize=10) + ax.tick_params(axis='both', which='major', labelsize=15) + plt.xlabel('Episodes', size=20) + plt.ylabel('# Total Rewards', size=20) + plt.show() -## General settings +# General settings EPISODES = 200 batch_size = 32 init_replay_memory_size = 500 @@ -115,7 +133,7 @@ def plot_learning_history(history): state = env.reset() state = np.reshape(state, [1, agent.state_size]) - ## Filling up the replay-memory + # Filling up the replay-memory for i in range(init_replay_memory_size): action = agent.choose_action(state) next_state, reward, done, _ = env.step(action) @@ -151,4 +169,4 @@ def plot_learning_history(history): break loss = agent.replay(batch_size) losses.append(loss) - plot_learning_history((total_rewards, losses)) + plot_learning_history(total_rewards)