-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
7 changed files
with
14,819 additions
and
2,874 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,243 @@ | ||
# coding: utf-8 | ||
|
||
|
||
import numpy as np | ||
import tensorflow as tf | ||
import pandas as pd | ||
import sklearn | ||
import sklearn.model_selection | ||
|
||
# *Python Machine Learning 3rd Edition* by [Sebastian Raschka](https://sebastianraschka.com) & [Vahid Mirjalili](http://vahidmirjalili.com), Packt Publishing Ltd. 2019 | ||
# | ||
# Code Repository: https://github.com/rasbt/python-machine-learning-book-3rd-edition | ||
# | ||
# Code License: [MIT License](https://github.com/rasbt/python-machine-learning-book-3rd-edition/blob/master/LICENSE.txt) | ||
|
||
# # Chapter 14: Going Deeper -- the Mechanics of TensorFlow (Part 2/3) | ||
|
||
# Note that the optional watermark extension is a small IPython notebook plugin that I developed to make the code reproducible. You can just skip the following line(s). | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
# ## TensorFlow Estimators | ||
# | ||
# ##### Steps for using pre-made estimators | ||
# | ||
# * **Step 1:** Define the input function for importing the data | ||
# * **Step 2:** Define the feature columns to bridge between the estimator and the data | ||
# * **Step 3:** Instantiate an estimator or convert a Keras model to an estimator | ||
# * **Step 4:** Use the estimator: train() evaluate() predict() | ||
|
||
|
||
|
||
tf.random.set_seed(1) | ||
np.random.seed(1) | ||
|
||
|
||
# ### Working with feature columns | ||
# | ||
# | ||
# * See definition: https://developers.google.com/machine-learning/glossary/#feature_columns | ||
# * Documentation: https://www.tensorflow.org/api_docs/python/tf/feature_column | ||
|
||
|
||
|
||
dataset_path = tf.keras.utils.get_file("auto-mpg.data", | ||
("http://archive.ics.uci.edu/ml/machine-learning-databases" | ||
"/auto-mpg/auto-mpg.data")) | ||
|
||
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', | ||
'Weight', 'Acceleration', 'ModelYear', 'Origin'] | ||
|
||
df = pd.read_csv(dataset_path, names=column_names, | ||
na_values = "?", comment='\t', | ||
sep=" ", skipinitialspace=True) | ||
|
||
df.tail() | ||
|
||
|
||
|
||
|
||
print(df.isna().sum()) | ||
|
||
df = df.dropna() | ||
df = df.reset_index(drop=True) | ||
df.tail() | ||
|
||
|
||
|
||
|
||
|
||
|
||
df_train, df_test = sklearn.model_selection.train_test_split(df, train_size=0.8) | ||
train_stats = df_train.describe().transpose() | ||
train_stats | ||
|
||
|
||
|
||
|
||
numeric_column_names = ['Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration'] | ||
|
||
df_train_norm, df_test_norm = df_train.copy(), df_test.copy() | ||
|
||
for col_name in numeric_column_names: | ||
mean = train_stats.loc[col_name, 'mean'] | ||
std = train_stats.loc[col_name, 'std'] | ||
df_train_norm.loc[:, col_name] = (df_train_norm.loc[:, col_name] - mean)/std | ||
df_test_norm.loc[:, col_name] = (df_test_norm.loc[:, col_name] - mean)/std | ||
|
||
df_train_norm.tail() | ||
|
||
|
||
# #### Numeric Columns | ||
|
||
|
||
|
||
numeric_features = [] | ||
|
||
for col_name in numeric_column_names: | ||
numeric_features.append(tf.feature_column.numeric_column(key=col_name)) | ||
|
||
numeric_features | ||
|
||
|
||
|
||
|
||
feature_year = tf.feature_column.numeric_column(key="ModelYear") | ||
|
||
bucketized_features = [] | ||
|
||
bucketized_features.append(tf.feature_column.bucketized_column( | ||
source_column=feature_year, | ||
boundaries=[73, 76, 79])) | ||
|
||
print(bucketized_features) | ||
|
||
|
||
|
||
|
||
feature_origin = tf.feature_column.categorical_column_with_vocabulary_list( | ||
key='Origin', | ||
vocabulary_list=[1, 2, 3]) | ||
|
||
categorical_indicator_features = [] | ||
categorical_indicator_features.append(tf.feature_column.indicator_column(feature_origin)) | ||
|
||
print(categorical_indicator_features) | ||
|
||
|
||
# ### Machine learning with pre-made Estimators | ||
|
||
|
||
|
||
def train_input_fn(df_train, batch_size=8): | ||
df = df_train.copy() | ||
train_x, train_y = df, df.pop('MPG') | ||
dataset = tf.data.Dataset.from_tensor_slices((dict(train_x), train_y)) | ||
|
||
# shuffle, repeat, and batch the examples | ||
return dataset.shuffle(1000).repeat().batch(batch_size) | ||
|
||
## inspection | ||
ds = train_input_fn(df_train_norm) | ||
batch = next(iter(ds)) | ||
print('Keys:', batch[0].keys()) | ||
print('Batch Model Years:', batch[0]['ModelYear']) | ||
|
||
|
||
|
||
|
||
all_feature_columns = (numeric_features + | ||
bucketized_features + | ||
categorical_indicator_features) | ||
|
||
print(all_feature_columns) | ||
|
||
|
||
|
||
|
||
regressor = tf.estimator.DNNRegressor( | ||
feature_columns=all_feature_columns, | ||
hidden_units=[32, 10], | ||
model_dir='models/autompg-dnnregressor/') | ||
|
||
|
||
|
||
|
||
EPOCHS = 1000 | ||
BATCH_SIZE = 8 | ||
total_steps = EPOCHS * int(np.ceil(len(df_train) / BATCH_SIZE)) | ||
print('Training Steps:', total_steps) | ||
|
||
regressor.train( | ||
input_fn=lambda:train_input_fn(df_train_norm, batch_size=BATCH_SIZE), | ||
steps=total_steps) | ||
|
||
|
||
|
||
|
||
reloaded_regressor = tf.estimator.DNNRegressor( | ||
feature_columns=all_feature_columns, | ||
hidden_units=[32, 10], | ||
warm_start_from='models/autompg-dnnregressor/', | ||
model_dir='models/autompg-dnnregressor/') | ||
|
||
|
||
|
||
|
||
def eval_input_fn(df_test, batch_size=8): | ||
df = df_test.copy() | ||
test_x, test_y = df, df.pop('MPG') | ||
dataset = tf.data.Dataset.from_tensor_slices((dict(test_x), test_y)) | ||
|
||
return dataset.batch(batch_size) | ||
|
||
eval_results = reloaded_regressor.evaluate( | ||
input_fn=lambda:eval_input_fn(df_test_norm, batch_size=8)) | ||
|
||
for key in eval_results: | ||
print('{:15s} {}'.format(key, eval_results[key])) | ||
|
||
print('Average-Loss {:.4f}'.format(eval_results['average_loss'])) | ||
|
||
|
||
|
||
|
||
pred_res = regressor.predict(input_fn=lambda: eval_input_fn(df_test_norm, batch_size=8)) | ||
|
||
print(next(iter(pred_res))) | ||
|
||
|
||
# #### Boosted Tree Regressor | ||
|
||
|
||
|
||
boosted_tree = tf.estimator.BoostedTreesRegressor( | ||
feature_columns=all_feature_columns, | ||
n_batches_per_layer=20, | ||
n_trees=200) | ||
|
||
boosted_tree.train( | ||
input_fn=lambda:train_input_fn(df_train_norm, batch_size=BATCH_SIZE)) | ||
|
||
eval_results = boosted_tree.evaluate( | ||
input_fn=lambda:eval_input_fn(df_test_norm, batch_size=8)) | ||
|
||
print(eval_results) | ||
|
||
print('Average-Loss {:.4f}'.format(eval_results['average_loss'])) | ||
|
||
|
||
# --- | ||
# | ||
# Readers may ignore the next cell. | ||
|
||
|
||
|
||
|
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,203 @@ | ||
# coding: utf-8 | ||
|
||
|
||
import tensorflow as tf | ||
import tensorflow_datasets as tfds | ||
import numpy as np | ||
|
||
# *Python Machine Learning 3rd Edition* by [Sebastian Raschka](https://sebastianraschka.com) & [Vahid Mirjalili](http://vahidmirjalili.com), Packt Publishing Ltd. 2019 | ||
# | ||
# Code Repository: https://github.com/rasbt/python-machine-learning-book-3rd-edition | ||
# | ||
# Code License: [MIT License](https://github.com/rasbt/python-machine-learning-book-3rd-edition/blob/master/LICENSE.txt) | ||
|
||
# # Chapter 14: Going Deeper -- the Mechanics of TensorFlow (Part 3/3) | ||
|
||
# Note that the optional watermark extension is a small IPython notebook plugin that I developed to make the code reproducible. You can just skip the following line(s). | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
# ### Using Estimators for MNIST hand-written digit classification | ||
|
||
|
||
|
||
BUFFER_SIZE = 10000 | ||
BATCH_SIZE = 64 | ||
NUM_EPOCHS = 20 | ||
steps_per_epoch = np.ceil(60000 / BATCH_SIZE) | ||
|
||
|
||
|
||
|
||
def preprocess(item): | ||
image = item['image'] | ||
label = item['label'] | ||
image = tf.image.convert_image_dtype( | ||
image, tf.float32) | ||
image = tf.reshape(image, (-1,)) | ||
|
||
return {'image-pixels':image}, label[..., tf.newaxis] | ||
|
||
#Step 1: Defining the input functions (one for training and one for evaluation) | ||
## Step 1: Define the input function for training | ||
def train_input_fn(): | ||
datasets = tfds.load(name='mnist') | ||
mnist_train = datasets['train'] | ||
|
||
dataset = mnist_train.map(preprocess) | ||
dataset = dataset.shuffle(BUFFER_SIZE) | ||
dataset = dataset.batch(BATCH_SIZE) | ||
return dataset.repeat() | ||
|
||
## define input-function for evaluation: | ||
def eval_input_fn(): | ||
datasets = tfds.load(name='mnist') | ||
mnist_test = datasets['test'] | ||
dataset = mnist_test.map(preprocess).batch(BATCH_SIZE) | ||
return dataset | ||
|
||
|
||
|
||
|
||
## Step 2: feature column | ||
image_feature_column = tf.feature_column.numeric_column( | ||
key='image-pixels', shape=(28*28)) | ||
|
||
|
||
|
||
|
||
## Step 3: instantiate the estimator | ||
dnn_classifier = tf.estimator.DNNClassifier( | ||
feature_columns=[image_feature_column], | ||
hidden_units=[32, 16], | ||
n_classes=10, | ||
model_dir='models/mnist-dnn/') | ||
|
||
|
||
## Step 4: train | ||
dnn_classifier.train( | ||
input_fn=train_input_fn, | ||
steps=NUM_EPOCHS * steps_per_epoch) | ||
|
||
|
||
|
||
|
||
eval_result = dnn_classifier.evaluate( | ||
input_fn=eval_input_fn) | ||
|
||
print(eval_result) | ||
|
||
|
||
# ### Creating a custom Estimator from an existing Keras model | ||
|
||
|
||
|
||
## Set random seeds for reproducibility | ||
tf.random.set_seed(1) | ||
np.random.seed(1) | ||
|
||
## Create the data | ||
x = np.random.uniform(low=-1, high=1, size=(200, 2)) | ||
y = np.ones(len(x)) | ||
y[x[:, 0] * x[:, 1]<0] = 0 | ||
|
||
x_train = x[:100, :] | ||
y_train = y[:100] | ||
x_valid = x[100:, :] | ||
y_valid = y[100:] | ||
|
||
|
||
|
||
|
||
## Step 1: Define the input functions | ||
def train_input_fn(x_train, y_train, batch_size=8): | ||
dataset = tf.data.Dataset.from_tensor_slices( | ||
({'input-features':x_train}, y_train.reshape(-1, 1))) | ||
|
||
# Shuffle, repeat, and batch the examples. | ||
return dataset.shuffle(100).repeat().batch(batch_size) | ||
|
||
def eval_input_fn(x_test, y_test=None, batch_size=8): | ||
if y_test is None: | ||
dataset = tf.data.Dataset.from_tensor_slices( | ||
{'input-features':x_test}) | ||
else: | ||
dataset = tf.data.Dataset.from_tensor_slices( | ||
({'input-features':x_test}, y_test.reshape(-1, 1))) | ||
|
||
|
||
# Shuffle, repeat, and batch the examples. | ||
return dataset.batch(batch_size) | ||
|
||
|
||
|
||
|
||
## Step 2: Define the feature columns | ||
features = [ | ||
tf.feature_column.numeric_column( | ||
key='input-features:', shape=(2,)) | ||
] | ||
|
||
features | ||
|
||
|
||
|
||
|
||
## Step 3: Create the estimator: convert from a Keras model | ||
model = tf.keras.Sequential([ | ||
tf.keras.layers.Input(shape=(2,), name='input-features'), | ||
tf.keras.layers.Dense(units=4, activation='relu'), | ||
tf.keras.layers.Dense(units=4, activation='relu'), | ||
tf.keras.layers.Dense(units=4, activation='relu'), | ||
tf.keras.layers.Dense(1, activation='sigmoid') | ||
]) | ||
|
||
model.summary() | ||
|
||
model.compile(optimizer=tf.keras.optimizers.SGD(), | ||
loss=tf.keras.losses.BinaryCrossentropy(), | ||
metrics=[tf.keras.metrics.BinaryAccuracy()]) | ||
|
||
my_estimator = tf.keras.estimator.model_to_estimator( | ||
keras_model=model, | ||
model_dir='models/estimator-for-XOR/') | ||
|
||
|
||
|
||
|
||
## Step 4: use the estimator: train/evaluate/predict | ||
|
||
num_epochs = 200 | ||
batch_size = 2 | ||
steps_per_epoch = np.ceil(len(x_train) / batch_size) | ||
|
||
my_estimator.train( | ||
input_fn=lambda: train_input_fn(x_train, y_train, batch_size), | ||
steps=num_epochs * steps_per_epoch) | ||
|
||
|
||
|
||
|
||
my_estimator.evaluate( | ||
input_fn=lambda: eval_input_fn(x_valid, y_valid, batch_size)) | ||
|
||
|
||
# ... | ||
|
||
# # Summary | ||
|
||
# ... | ||
|
||
# --- | ||
# | ||
# Readers may ignore the next cell. | ||
|
||
|
||
|
||
|