Skip to content

Commit b06d52c

Browse files

File tree

13 files changed

+391
-488
lines changed

13 files changed

+391
-488
lines changed

Diff for: code/ch09/ch09.ipynb

+149-135
Large diffs are not rendered by default.

Diff for: code/ch09/movieclassifier/pkl_objects/classifier.pkl

32 Bytes
Binary file not shown.

Diff for: code/ch09/movieclassifier/pkl_objects/stopwords.pkl

245 Bytes
Binary file not shown.

Diff for: code/ch09/movieclassifier/reviews.sqlite

0 Bytes
Binary file not shown.
74 Bytes
Binary file not shown.
418 Bytes
Binary file not shown.

Diff for: code/ch09/movieclassifier_with_update/reviews.sqlite

6 KB
Binary file not shown.

Diff for: code/ch09/reviews.sqlite

0 Bytes
Binary file not shown.

Diff for: code/ch10/ch10.ipynb

+64-98
Large diffs are not rendered by default.

Diff for: code/ch11/ch11.ipynb

+73-79
Large diffs are not rendered by default.

Diff for: code/ch11/ch11.py

-5
Original file line numberDiff line numberDiff line change
@@ -497,8 +497,3 @@
497497

498498

499499

500-
501-
502-
503-
504-

Diff for: code/ch12/ch12.ipynb

+63-149
Large diffs are not rendered by default.

Diff for: code/ch12/ch12.py

+42-22
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# coding: utf-8
22

33

4-
import os
5-
import struct
6-
import numpy as np
74
import sys
85
import gzip
96
import shutil
7+
import os
8+
import struct
9+
import numpy as np
1010
import matplotlib.pyplot as plt
1111

1212
# *Python Machine Learning 2nd Edition* by [Sebastian Raschka](https://sebastianraschka.com), Packt Publishing Ltd. 2017
@@ -91,12 +91,48 @@
9191
# - Test set images: t10k-images-idx3-ubyte.gz (1.6 MB, 7.8 MB, 10,000 samples)
9292
# - Test set labels: t10k-labels-idx1-ubyte.gz (5 KB, 10 KB unzipped, 10,000 labels)
9393
#
94-
# In this section, we will only be working with a subset of MNIST, thus, we only need to download the training set images and training set labels. After downloading the files, I recommend unzipping the files using the Unix/Linux gzip tool from the terminal for efficiency, e.g., using the command
94+
# In this section, we will only be working with a subset of MNIST, thus, we only need to download the training set images and training set labels.
95+
#
96+
# After downloading the files, simply run the next code cell to unzip the files.
97+
#
98+
#
99+
100+
101+
102+
# this code cell unzips mnist
103+
104+
105+
if (sys.version_info > (3, 0)):
106+
writemode = 'wb'
107+
else:
108+
writemode = 'w'
109+
110+
zipped_mnist = [f for f in os.listdir('./') if f.endswith('ubyte.gz')]
111+
for z in zipped_mnist:
112+
with gzip.GzipFile(z, mode='rb') as decompressed, open(z[:-3], writemode) as outfile:
113+
outfile.write(decompressed.read())
114+
115+
116+
# ----
117+
#
118+
# IGNORE IF THE CODE CELL ABOVE EXECUTED WITHOUT PROBLEMS:
119+
#
120+
# If you have issues with the code cell above, I recommend unzipping the files using the Unix/Linux gzip tool from the terminal for efficiency, e.g., using the command
95121
#
96122
# gzip *ubyte.gz -d
97123
#
98124
# in your local MNIST download directory, or, using your favorite unzipping tool if you are working with a machine running on Microsoft Windows. The images are stored in byte form, and using the following function, we will read them into NumPy arrays that we will use to train our MLP.
99125
#
126+
# Please note that if you are **not** using gzip, please make sure tha the files are named
127+
#
128+
# - train-images-idx3-ubyte
129+
# - train-labels-idx1-ubyte
130+
# - t10k-images-idx3-ubyte
131+
# - t10k-labels-idx1-ubyte
132+
#
133+
# If a file is e.g., named `train-images.idx3-ubyte` after unzipping (this is due to the fact that certain tools try to guess a file suffix), please rename it to `train-images-idx3-ubyte` before proceeding.
134+
#
135+
# ----
100136

101137

102138

@@ -130,22 +166,6 @@ def load_mnist(path, kind='train'):
130166

131167

132168

133-
# unzips mnist
134-
135-
136-
if (sys.version_info > (3, 0)):
137-
writemode = 'wb'
138-
else:
139-
writemode = 'w'
140-
141-
zipped_mnist = [f for f in os.listdir('./') if f.endswith('ubyte.gz')]
142-
for z in zipped_mnist:
143-
with gzip.GzipFile(z, mode='rb') as decompressed, open(z[:-3], writemode) as outfile:
144-
outfile.write(decompressed.read())
145-
146-
147-
148-
149169
X_train, y_train = load_mnist('', kind='train')
150170
print('Rows: %d, columns: %d' % (X_train.shape[0], X_train.shape[1]))
151171

@@ -331,7 +351,7 @@ def _compute_cost(self, y_enc, output):
331351
term1 = -y_enc * (np.log(output))
332352
term2 = (1. - y_enc) * np.log(1. - output)
333353
cost = np.sum(term1 - term2) + L2_term
334-
354+
335355
# If you are applying this cost function to other
336356
# datasets where activation
337357
# values maybe become more extreme (closer to zero or 1)
@@ -345,7 +365,7 @@ def _compute_cost(self, y_enc, output):
345365
#
346366
# term1 = -y_enc * (np.log(output + 1e-5))
347367
# term2 = (1. - y_enc) * np.log(1. - output + 1e-5)
348-
368+
349369
return cost
350370

351371
def predict(self, X):

0 commit comments

Comments
 (0)