split up ch14

m-nasiruddin · Oct 30, 2019 · 7ae9bc7 · 7ae9bc7
1 parent 0f8305b
commit 7ae9bc7
Showing 7 changed files with 14,819 additions and 2,874 deletions.
diff --git a/ch14/ch14-notebook.ipynb b/ch14/ch14-notebook.ipynb
diff --git a/ch14/ch14_part1.ipynb b/ch14/ch14_part1.ipynb
diff --git a/ch14/ch14_part1.py b/ch14/ch14_part1.py
diff --git a/ch14/ch14_part2.ipynb b/ch14/ch14_part2.ipynb
diff --git a/ch14/ch14_part2.py b/ch14/ch14_part2.py
@@ -0,0 +1,243 @@
+# coding: utf-8
+
+
+import numpy as np
+import tensorflow as tf
+import pandas as pd
+import sklearn
+import sklearn.model_selection
+
+# *Python Machine Learning 3rd Edition* by [Sebastian Raschka](https://sebastianraschka.com) & [Vahid Mirjalili](http://vahidmirjalili.com), Packt Publishing Ltd. 2019
+# 
+# Code Repository: https://github.com/rasbt/python-machine-learning-book-3rd-edition
+# 
+# Code License: [MIT License](https://github.com/rasbt/python-machine-learning-book-3rd-edition/blob/master/LICENSE.txt)
+
+# # Chapter 14: Going Deeper -- the Mechanics of TensorFlow (Part 2/3)
+
+# Note that the optional watermark extension is a small IPython notebook plugin that I developed to make the code reproducible. You can just skip the following line(s).
+
+
+
+
+
+
+
+
+
+# ## TensorFlow Estimators
+# 
+# ##### Steps for using pre-made estimators
+# 
+#  * **Step 1:** Define the input function for importing the data   
+#  * **Step 2:**  Define the feature columns to bridge between the estimator and the data   
+#  * **Step 3:** Instantiate an estimator or convert a Keras model to an estimator   
+#  * **Step 4:** Use the estimator: train() evaluate() predict()   
+
+
+
+tf.random.set_seed(1)
+np.random.seed(1)
+
+
+# ### Working with feature columns
+# 
+# 
+#  * See definition: https://developers.google.com/machine-learning/glossary/#feature_columns
+#  * Documentation: https://www.tensorflow.org/api_docs/python/tf/feature_column
+
+
+
+dataset_path = tf.keras.utils.get_file("auto-mpg.data", 
+                                       ("http://archive.ics.uci.edu/ml/machine-learning-databases"
+                                        "/auto-mpg/auto-mpg.data"))
+
+column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower',
+                'Weight', 'Acceleration', 'ModelYear', 'Origin']
+
+df = pd.read_csv(dataset_path, names=column_names,
+                 na_values = "?", comment='\t',
+                 sep=" ", skipinitialspace=True)
+
+df.tail()
+
+
+
+
+print(df.isna().sum())
+
+df = df.dropna()
+df = df.reset_index(drop=True)
+df.tail()
+
+
+
+
+
+
+df_train, df_test = sklearn.model_selection.train_test_split(df, train_size=0.8)
+train_stats = df_train.describe().transpose()
+train_stats
+
+
+
+
+numeric_column_names = ['Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration']
+
+df_train_norm, df_test_norm = df_train.copy(), df_test.copy()
+
+for col_name in numeric_column_names:
+    mean = train_stats.loc[col_name, 'mean']
+    std  = train_stats.loc[col_name, 'std']
+    df_train_norm.loc[:, col_name] = (df_train_norm.loc[:, col_name] - mean)/std
+    df_test_norm.loc[:, col_name] = (df_test_norm.loc[:, col_name] - mean)/std
+
+df_train_norm.tail()
+
+
+# #### Numeric Columns
+
+
+
+numeric_features = []
+
+for col_name in numeric_column_names:
+    numeric_features.append(tf.feature_column.numeric_column(key=col_name))
+
+numeric_features
+
+
+
+
+feature_year = tf.feature_column.numeric_column(key="ModelYear")
+
+bucketized_features = []
+
+bucketized_features.append(tf.feature_column.bucketized_column(
+    source_column=feature_year,
+    boundaries=[73, 76, 79]))
+
+print(bucketized_features)
+
+
+
+
+feature_origin = tf.feature_column.categorical_column_with_vocabulary_list(
+    key='Origin',
+    vocabulary_list=[1, 2, 3])
+
+categorical_indicator_features = []
+categorical_indicator_features.append(tf.feature_column.indicator_column(feature_origin))
+
+print(categorical_indicator_features)
+
+
+# ### Machine learning with pre-made Estimators
+
+
+
+def train_input_fn(df_train, batch_size=8):
+    df = df_train.copy()
+    train_x, train_y = df, df.pop('MPG')
+    dataset = tf.data.Dataset.from_tensor_slices((dict(train_x), train_y))
+
+    # shuffle, repeat, and batch the examples
+    return dataset.shuffle(1000).repeat().batch(batch_size)
+
+## inspection
+ds = train_input_fn(df_train_norm)
+batch = next(iter(ds))
+print('Keys:', batch[0].keys())
+print('Batch Model Years:', batch[0]['ModelYear'])
+
+
+
+
+all_feature_columns = (numeric_features + 
+                       bucketized_features + 
+                       categorical_indicator_features)
+
+print(all_feature_columns)
+
+
+
+
+regressor = tf.estimator.DNNRegressor(
+    feature_columns=all_feature_columns,
+    hidden_units=[32, 10],
+    model_dir='models/autompg-dnnregressor/')
+
+
+
+
+EPOCHS = 1000
+BATCH_SIZE = 8
+total_steps = EPOCHS * int(np.ceil(len(df_train) / BATCH_SIZE))
+print('Training Steps:', total_steps)
+
+regressor.train(
+    input_fn=lambda:train_input_fn(df_train_norm, batch_size=BATCH_SIZE),
+    steps=total_steps)
+
+
+
+
+reloaded_regressor = tf.estimator.DNNRegressor(
+    feature_columns=all_feature_columns,
+    hidden_units=[32, 10],
+    warm_start_from='models/autompg-dnnregressor/',
+    model_dir='models/autompg-dnnregressor/')
+
+
+
+
+def eval_input_fn(df_test, batch_size=8):
+    df = df_test.copy()
+    test_x, test_y = df, df.pop('MPG')
+    dataset = tf.data.Dataset.from_tensor_slices((dict(test_x), test_y))
+
+    return dataset.batch(batch_size)
+
+eval_results = reloaded_regressor.evaluate(
+    input_fn=lambda:eval_input_fn(df_test_norm, batch_size=8))
+
+for key in eval_results:
+    print('{:15s} {}'.format(key, eval_results[key]))
+
+print('Average-Loss {:.4f}'.format(eval_results['average_loss']))
+
+
+
+
+pred_res = regressor.predict(input_fn=lambda: eval_input_fn(df_test_norm, batch_size=8))
+
+print(next(iter(pred_res)))
+
+
+# #### Boosted Tree Regressor
+
+
+
+boosted_tree = tf.estimator.BoostedTreesRegressor(
+    feature_columns=all_feature_columns,
+    n_batches_per_layer=20,
+    n_trees=200)
+
+boosted_tree.train(
+    input_fn=lambda:train_input_fn(df_train_norm, batch_size=BATCH_SIZE))
+
+eval_results = boosted_tree.evaluate(
+    input_fn=lambda:eval_input_fn(df_test_norm, batch_size=8))
+
+print(eval_results)
+
+print('Average-Loss {:.4f}'.format(eval_results['average_loss']))
+
+
+# ---
+# 
+# Readers may ignore the next cell.
+
+
+
+
diff --git a/ch14/ch14_part3.ipynb b/ch14/ch14_part3.ipynb
diff --git a/ch14/ch14_part3.py b/ch14/ch14_part3.py
@@ -0,0 +1,203 @@
+# coding: utf-8
+
+
+import tensorflow as tf
+import tensorflow_datasets as tfds
+import numpy as np
+
+# *Python Machine Learning 3rd Edition* by [Sebastian Raschka](https://sebastianraschka.com) & [Vahid Mirjalili](http://vahidmirjalili.com), Packt Publishing Ltd. 2019
+# 
+# Code Repository: https://github.com/rasbt/python-machine-learning-book-3rd-edition
+# 
+# Code License: [MIT License](https://github.com/rasbt/python-machine-learning-book-3rd-edition/blob/master/LICENSE.txt)
+
+# # Chapter 14: Going Deeper -- the Mechanics of TensorFlow (Part 3/3)
+
+# Note that the optional watermark extension is a small IPython notebook plugin that I developed to make the code reproducible. You can just skip the following line(s).
+
+
+
+
+
+
+
+
+
+# ### Using Estimators for MNIST hand-written digit classification
+
+
+
+BUFFER_SIZE = 10000
+BATCH_SIZE = 64
+NUM_EPOCHS = 20
+steps_per_epoch = np.ceil(60000 / BATCH_SIZE)
+
+
+
+
+def preprocess(item):
+    image = item['image']
+    label = item['label']
+    image = tf.image.convert_image_dtype(
+        image, tf.float32)
+    image = tf.reshape(image, (-1,))
+
+    return {'image-pixels':image}, label[..., tf.newaxis]
+
+#Step 1: Defining the input functions (one for training and one for evaluation)
+## Step 1: Define the input function for training
+def train_input_fn():
+    datasets = tfds.load(name='mnist')
+    mnist_train = datasets['train']
+
+    dataset = mnist_train.map(preprocess)
+    dataset = dataset.shuffle(BUFFER_SIZE)
+    dataset = dataset.batch(BATCH_SIZE)
+    return dataset.repeat()
+
+## define input-function for evaluation:
+def eval_input_fn():
+    datasets = tfds.load(name='mnist')
+    mnist_test = datasets['test']
+    dataset = mnist_test.map(preprocess).batch(BATCH_SIZE)
+    return dataset
+
+
+
+
+## Step 2: feature column
+image_feature_column = tf.feature_column.numeric_column(
+    key='image-pixels', shape=(28*28))
+
+
+
+
+## Step 3: instantiate the estimator
+dnn_classifier = tf.estimator.DNNClassifier(
+    feature_columns=[image_feature_column],
+    hidden_units=[32, 16],
+    n_classes=10,
+    model_dir='models/mnist-dnn/')
+
+
+## Step 4: train
+dnn_classifier.train(
+    input_fn=train_input_fn,
+    steps=NUM_EPOCHS * steps_per_epoch)
+
+
+
+
+eval_result = dnn_classifier.evaluate(
+    input_fn=eval_input_fn)
+
+print(eval_result)
+
+
+# ### Creating a custom Estimator from an existing Keras model
+
+
+
+## Set random seeds for reproducibility
+tf.random.set_seed(1)
+np.random.seed(1)
+
+## Create the data
+x = np.random.uniform(low=-1, high=1, size=(200, 2))
+y = np.ones(len(x))
+y[x[:, 0] * x[:, 1]<0] = 0
+
+x_train = x[:100, :]
+y_train = y[:100]
+x_valid = x[100:, :]
+y_valid = y[100:]
+
+
+
+
+## Step 1: Define the input functions
+def train_input_fn(x_train, y_train, batch_size=8):
+    dataset = tf.data.Dataset.from_tensor_slices(
+        ({'input-features':x_train}, y_train.reshape(-1, 1)))
+
+    # Shuffle, repeat, and batch the examples.
+    return dataset.shuffle(100).repeat().batch(batch_size)
+
+def eval_input_fn(x_test, y_test=None, batch_size=8):
+    if y_test is None:
+        dataset = tf.data.Dataset.from_tensor_slices(
+            {'input-features':x_test})
+    else:
+        dataset = tf.data.Dataset.from_tensor_slices(
+            ({'input-features':x_test}, y_test.reshape(-1, 1)))
+
+
+    # Shuffle, repeat, and batch the examples.
+    return dataset.batch(batch_size)
+
+
+
+
+## Step 2: Define the feature columns
+features = [
+    tf.feature_column.numeric_column(
+        key='input-features:', shape=(2,))
+]
+
+features
+
+
+
+
+## Step 3: Create the estimator: convert from a Keras model
+model = tf.keras.Sequential([
+    tf.keras.layers.Input(shape=(2,), name='input-features'),
+    tf.keras.layers.Dense(units=4, activation='relu'),
+    tf.keras.layers.Dense(units=4, activation='relu'),
+    tf.keras.layers.Dense(units=4, activation='relu'),
+    tf.keras.layers.Dense(1, activation='sigmoid')
+])
+
+model.summary()
+
+model.compile(optimizer=tf.keras.optimizers.SGD(),
+              loss=tf.keras.losses.BinaryCrossentropy(),
+              metrics=[tf.keras.metrics.BinaryAccuracy()])
+
+my_estimator = tf.keras.estimator.model_to_estimator(
+    keras_model=model,
+    model_dir='models/estimator-for-XOR/')
+
+
+
+
+## Step 4: use the estimator: train/evaluate/predict
+
+num_epochs = 200
+batch_size = 2
+steps_per_epoch = np.ceil(len(x_train) / batch_size)
+
+my_estimator.train(
+    input_fn=lambda: train_input_fn(x_train, y_train, batch_size),
+    steps=num_epochs * steps_per_epoch)
+
+
+
+
+my_estimator.evaluate(
+    input_fn=lambda: eval_input_fn(x_valid, y_valid, batch_size))
+
+
+# ...
+
+# # Summary
+
+# ...
+
+# ---
+# 
+# Readers may ignore the next cell.
+
+
+
+