Skip to content

Commit

Permalink
split up ch14
Browse files Browse the repository at this point in the history
rasbt committed Oct 30, 2019
1 parent 0f8305b commit 7ae9bc7
Showing 7 changed files with 14,819 additions and 2,874 deletions.
2,874 changes: 0 additions & 2,874 deletions ch14/ch14-notebook.ipynb

This file was deleted.

1,553 changes: 1,553 additions & 0 deletions ch14/ch14_part1.ipynb

Large diffs are not rendered by default.

774 changes: 774 additions & 0 deletions ch14/ch14_part1.py

Large diffs are not rendered by default.

2,434 changes: 2,434 additions & 0 deletions ch14/ch14_part2.ipynb

Large diffs are not rendered by default.

243 changes: 243 additions & 0 deletions ch14/ch14_part2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
# coding: utf-8


import numpy as np
import tensorflow as tf
import pandas as pd
import sklearn
import sklearn.model_selection

# *Python Machine Learning 3rd Edition* by [Sebastian Raschka](https://sebastianraschka.com) & [Vahid Mirjalili](http://vahidmirjalili.com), Packt Publishing Ltd. 2019
#
# Code Repository: https://github.com/rasbt/python-machine-learning-book-3rd-edition
#
# Code License: [MIT License](https://github.com/rasbt/python-machine-learning-book-3rd-edition/blob/master/LICENSE.txt)

# # Chapter 14: Going Deeper -- the Mechanics of TensorFlow (Part 2/3)

# Note that the optional watermark extension is a small IPython notebook plugin that I developed to make the code reproducible. You can just skip the following line(s).









# ## TensorFlow Estimators
#
# ##### Steps for using pre-made estimators
#
# * **Step 1:** Define the input function for importing the data
# * **Step 2:** Define the feature columns to bridge between the estimator and the data
# * **Step 3:** Instantiate an estimator or convert a Keras model to an estimator
# * **Step 4:** Use the estimator: train() evaluate() predict()



tf.random.set_seed(1)
np.random.seed(1)


# ### Working with feature columns
#
#
# * See definition: https://developers.google.com/machine-learning/glossary/#feature_columns
# * Documentation: https://www.tensorflow.org/api_docs/python/tf/feature_column



dataset_path = tf.keras.utils.get_file("auto-mpg.data",
("http://archive.ics.uci.edu/ml/machine-learning-databases"
"/auto-mpg/auto-mpg.data"))

column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower',
'Weight', 'Acceleration', 'ModelYear', 'Origin']

df = pd.read_csv(dataset_path, names=column_names,
na_values = "?", comment='\t',
sep=" ", skipinitialspace=True)

df.tail()




print(df.isna().sum())

df = df.dropna()
df = df.reset_index(drop=True)
df.tail()






df_train, df_test = sklearn.model_selection.train_test_split(df, train_size=0.8)
train_stats = df_train.describe().transpose()
train_stats




numeric_column_names = ['Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration']

df_train_norm, df_test_norm = df_train.copy(), df_test.copy()

for col_name in numeric_column_names:
mean = train_stats.loc[col_name, 'mean']
std = train_stats.loc[col_name, 'std']
df_train_norm.loc[:, col_name] = (df_train_norm.loc[:, col_name] - mean)/std
df_test_norm.loc[:, col_name] = (df_test_norm.loc[:, col_name] - mean)/std

df_train_norm.tail()


# #### Numeric Columns



numeric_features = []

for col_name in numeric_column_names:
numeric_features.append(tf.feature_column.numeric_column(key=col_name))

numeric_features




feature_year = tf.feature_column.numeric_column(key="ModelYear")

bucketized_features = []

bucketized_features.append(tf.feature_column.bucketized_column(
source_column=feature_year,
boundaries=[73, 76, 79]))

print(bucketized_features)




feature_origin = tf.feature_column.categorical_column_with_vocabulary_list(
key='Origin',
vocabulary_list=[1, 2, 3])

categorical_indicator_features = []
categorical_indicator_features.append(tf.feature_column.indicator_column(feature_origin))

print(categorical_indicator_features)


# ### Machine learning with pre-made Estimators



def train_input_fn(df_train, batch_size=8):
df = df_train.copy()
train_x, train_y = df, df.pop('MPG')
dataset = tf.data.Dataset.from_tensor_slices((dict(train_x), train_y))

# shuffle, repeat, and batch the examples
return dataset.shuffle(1000).repeat().batch(batch_size)

## inspection
ds = train_input_fn(df_train_norm)
batch = next(iter(ds))
print('Keys:', batch[0].keys())
print('Batch Model Years:', batch[0]['ModelYear'])




all_feature_columns = (numeric_features +
bucketized_features +
categorical_indicator_features)

print(all_feature_columns)




regressor = tf.estimator.DNNRegressor(
feature_columns=all_feature_columns,
hidden_units=[32, 10],
model_dir='models/autompg-dnnregressor/')




EPOCHS = 1000
BATCH_SIZE = 8
total_steps = EPOCHS * int(np.ceil(len(df_train) / BATCH_SIZE))
print('Training Steps:', total_steps)

regressor.train(
input_fn=lambda:train_input_fn(df_train_norm, batch_size=BATCH_SIZE),
steps=total_steps)




reloaded_regressor = tf.estimator.DNNRegressor(
feature_columns=all_feature_columns,
hidden_units=[32, 10],
warm_start_from='models/autompg-dnnregressor/',
model_dir='models/autompg-dnnregressor/')




def eval_input_fn(df_test, batch_size=8):
df = df_test.copy()
test_x, test_y = df, df.pop('MPG')
dataset = tf.data.Dataset.from_tensor_slices((dict(test_x), test_y))

return dataset.batch(batch_size)

eval_results = reloaded_regressor.evaluate(
input_fn=lambda:eval_input_fn(df_test_norm, batch_size=8))

for key in eval_results:
print('{:15s} {}'.format(key, eval_results[key]))

print('Average-Loss {:.4f}'.format(eval_results['average_loss']))




pred_res = regressor.predict(input_fn=lambda: eval_input_fn(df_test_norm, batch_size=8))

print(next(iter(pred_res)))


# #### Boosted Tree Regressor



boosted_tree = tf.estimator.BoostedTreesRegressor(
feature_columns=all_feature_columns,
n_batches_per_layer=20,
n_trees=200)

boosted_tree.train(
input_fn=lambda:train_input_fn(df_train_norm, batch_size=BATCH_SIZE))

eval_results = boosted_tree.evaluate(
input_fn=lambda:eval_input_fn(df_test_norm, batch_size=8))

print(eval_results)

print('Average-Loss {:.4f}'.format(eval_results['average_loss']))


# ---
#
# Readers may ignore the next cell.




9,612 changes: 9,612 additions & 0 deletions ch14/ch14_part3.ipynb

Large diffs are not rendered by default.

203 changes: 203 additions & 0 deletions ch14/ch14_part3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
# coding: utf-8


import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np

# *Python Machine Learning 3rd Edition* by [Sebastian Raschka](https://sebastianraschka.com) & [Vahid Mirjalili](http://vahidmirjalili.com), Packt Publishing Ltd. 2019
#
# Code Repository: https://github.com/rasbt/python-machine-learning-book-3rd-edition
#
# Code License: [MIT License](https://github.com/rasbt/python-machine-learning-book-3rd-edition/blob/master/LICENSE.txt)

# # Chapter 14: Going Deeper -- the Mechanics of TensorFlow (Part 3/3)

# Note that the optional watermark extension is a small IPython notebook plugin that I developed to make the code reproducible. You can just skip the following line(s).









# ### Using Estimators for MNIST hand-written digit classification



BUFFER_SIZE = 10000
BATCH_SIZE = 64
NUM_EPOCHS = 20
steps_per_epoch = np.ceil(60000 / BATCH_SIZE)




def preprocess(item):
image = item['image']
label = item['label']
image = tf.image.convert_image_dtype(
image, tf.float32)
image = tf.reshape(image, (-1,))

return {'image-pixels':image}, label[..., tf.newaxis]

#Step 1: Defining the input functions (one for training and one for evaluation)
## Step 1: Define the input function for training
def train_input_fn():
datasets = tfds.load(name='mnist')
mnist_train = datasets['train']

dataset = mnist_train.map(preprocess)
dataset = dataset.shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE)
return dataset.repeat()

## define input-function for evaluation:
def eval_input_fn():
datasets = tfds.load(name='mnist')
mnist_test = datasets['test']
dataset = mnist_test.map(preprocess).batch(BATCH_SIZE)
return dataset




## Step 2: feature column
image_feature_column = tf.feature_column.numeric_column(
key='image-pixels', shape=(28*28))




## Step 3: instantiate the estimator
dnn_classifier = tf.estimator.DNNClassifier(
feature_columns=[image_feature_column],
hidden_units=[32, 16],
n_classes=10,
model_dir='models/mnist-dnn/')


## Step 4: train
dnn_classifier.train(
input_fn=train_input_fn,
steps=NUM_EPOCHS * steps_per_epoch)




eval_result = dnn_classifier.evaluate(
input_fn=eval_input_fn)

print(eval_result)


# ### Creating a custom Estimator from an existing Keras model



## Set random seeds for reproducibility
tf.random.set_seed(1)
np.random.seed(1)

## Create the data
x = np.random.uniform(low=-1, high=1, size=(200, 2))
y = np.ones(len(x))
y[x[:, 0] * x[:, 1]<0] = 0

x_train = x[:100, :]
y_train = y[:100]
x_valid = x[100:, :]
y_valid = y[100:]




## Step 1: Define the input functions
def train_input_fn(x_train, y_train, batch_size=8):
dataset = tf.data.Dataset.from_tensor_slices(
({'input-features':x_train}, y_train.reshape(-1, 1)))

# Shuffle, repeat, and batch the examples.
return dataset.shuffle(100).repeat().batch(batch_size)

def eval_input_fn(x_test, y_test=None, batch_size=8):
if y_test is None:
dataset = tf.data.Dataset.from_tensor_slices(
{'input-features':x_test})
else:
dataset = tf.data.Dataset.from_tensor_slices(
({'input-features':x_test}, y_test.reshape(-1, 1)))


# Shuffle, repeat, and batch the examples.
return dataset.batch(batch_size)




## Step 2: Define the feature columns
features = [
tf.feature_column.numeric_column(
key='input-features:', shape=(2,))
]

features




## Step 3: Create the estimator: convert from a Keras model
model = tf.keras.Sequential([
tf.keras.layers.Input(shape=(2,), name='input-features'),
tf.keras.layers.Dense(units=4, activation='relu'),
tf.keras.layers.Dense(units=4, activation='relu'),
tf.keras.layers.Dense(units=4, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])

model.summary()

model.compile(optimizer=tf.keras.optimizers.SGD(),
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=[tf.keras.metrics.BinaryAccuracy()])

my_estimator = tf.keras.estimator.model_to_estimator(
keras_model=model,
model_dir='models/estimator-for-XOR/')




## Step 4: use the estimator: train/evaluate/predict

num_epochs = 200
batch_size = 2
steps_per_epoch = np.ceil(len(x_train) / batch_size)

my_estimator.train(
input_fn=lambda: train_input_fn(x_train, y_train, batch_size),
steps=num_epochs * steps_per_epoch)




my_estimator.evaluate(
input_fn=lambda: eval_input_fn(x_valid, y_valid, batch_size))


# ...

# # Summary

# ...

# ---
#
# Readers may ignore the next cell.




0 comments on commit 7ae9bc7

Please sign in to comment.