Skip to content

Commit

Permalink
some minor comments added
Browse files Browse the repository at this point in the history
  • Loading branch information
rasbt committed Jul 5, 2018
1 parent 36219d8 commit ef6c60f
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 62 deletions.
90 changes: 58 additions & 32 deletions code/ch08/ch08.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@
"output_type": "stream",
"text": [
"Sebastian Raschka \n",
"last updated: 2017-09-02 \n",
"last updated: 2018-07-02 \n",
"\n",
"CPython 3.6.1\n",
"IPython 6.1.0\n",
"CPython 3.6.5\n",
"IPython 6.4.0\n",
"\n",
"numpy 1.12.1\n",
"pandas 0.20.3\n",
"sklearn 0.19.0\n",
"nltk 3.2.4\n"
"numpy 1.14.5\n",
"pandas 0.23.1\n",
"sklearn 0.19.1\n",
"nltk 3.3\n"
]
}
],
Expand Down Expand Up @@ -155,7 +155,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"100% | 80 MB | 1.03 MB/s | 77 sec elapsed"
"100% | 80 MB | 1.49 MB/s | 53 sec elapsed"
]
}
],
Expand Down Expand Up @@ -198,9 +198,7 @@
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"if not os.path.isdir('aclImdb'):\n",
Expand All @@ -218,15 +216,15 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"0% [##############################] 100% | ETA: 00:00:00\n",
"Total time elapsed: 00:02:21\n"
"Total time elapsed: 00:04:19\n"
]
}
],
Expand All @@ -246,7 +244,7 @@
"for s in ('test', 'train'):\n",
" for l in ('pos', 'neg'):\n",
" path = os.path.join(basepath, s, l)\n",
" for file in os.listdir(path):\n",
" for file in sorted(os.listdir(path)):\n",
" with open(os.path.join(path, file), \n",
" 'r', encoding='utf-8') as infile:\n",
" txt = infile.read()\n",
Expand All @@ -265,10 +263,8 @@
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": true
},
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
Expand All @@ -286,36 +282,34 @@
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": true
},
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv('movie_data.csv', index=False, encoding='utf-8')"
]
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
Expand Down Expand Up @@ -352,7 +346,7 @@
"2 ***SPOILER*** Do not read this, if you think a... 0"
]
},
"execution_count": 26,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -364,14 +358,34 @@
"df.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(50000, 2)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<hr>\n",
"### Note\n",
"\n",
"If you have problems with creating the `movie_data.csv` file in the previous chapter, you can find a download a zip archive at \n",
"If you have problems with creating the `movie_data.csv`, you can find a download a zip archive at \n",
"https://github.com/rasbt/python-machine-learning-book-2nd-edition/tree/master/code/ch08/\n",
"<hr>"
]
Expand Down Expand Up @@ -1855,6 +1869,18 @@
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
},
"toc": {
"nav_menu": {},
"number_sections": true,
Expand Down
2 changes: 1 addition & 1 deletion code/ch10/ch10.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1740,7 +1740,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
"version": "3.6.5"
}
},
"nbformat": 4,
Expand Down
81 changes: 53 additions & 28 deletions code/ch12/ch12.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -304,11 +304,61 @@
"- Test set images: t10k-images-idx3-ubyte.gz (1.6 MB, 7.8 MB, 10,000 samples)\n",
"- Test set labels: t10k-labels-idx1-ubyte.gz (5 KB, 10 KB unzipped, 10,000 labels)\n",
"\n",
"In this section, we will only be working with a subset of MNIST, thus, we only need to download the training set images and training set labels. After downloading the files, I recommend unzipping the files using the Unix/Linux gzip tool from the terminal for efficiency, e.g., using the command \n",
"In this section, we will only be working with a subset of MNIST, thus, we only need to download the training set images and training set labels. \n",
"\n",
"After downloading the files, simply run the next code cell to unzip the files.\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# unzips mnist\n",
"\n",
"import sys\n",
"import gzip\n",
"import shutil\n",
"\n",
"if (sys.version_info > (3, 0)):\n",
" writemode = 'wb'\n",
"else:\n",
" writemode = 'w'\n",
"\n",
"zipped_mnist = [f for f in os.listdir('./') if f.endswith('ubyte.gz')]\n",
"for z in zipped_mnist:\n",
" with gzip.GzipFile(z, mode='rb') as decompressed, open(z[:-3], writemode) as outfile:\n",
" outfile.write(decompressed.read()) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"----\n",
"\n",
"IGNORE IF THE CODE CELL ABOVE EXECUTED WITHOUT PROBLEMS:\n",
" \n",
"If you have issues with the code cell above, I recommend unzipping the files using the Unix/Linux gzip tool from the terminal for efficiency, e.g., using the command \n",
"\n",
" gzip *ubyte.gz -d\n",
" \n",
"in your local MNIST download directory, or, using your favorite unzipping tool if you are working with a machine running on Microsoft Windows. The images are stored in byte form, and using the following function, we will read them into NumPy arrays that we will use to train our MLP.\n"
"in your local MNIST download directory, or, using your favorite unzipping tool if you are working with a machine running on Microsoft Windows. The images are stored in byte form, and using the following function, we will read them into NumPy arrays that we will use to train our MLP.\n",
"\n",
"Please note that if you are **not** using gzip, please make sure tha the files are named\n",
"\n",
"- train-images-idx3-ubyte\n",
"- train-labels-idx1-ubyte\n",
"- t10k-images-idx3-ubyte\n",
"- t10k-labels-idx1-ubyte\n",
"\n",
"If a file is e.g., named `train-images.idx3-ubyte` after unzipping (this is due to the fact that certain tools try to guess a file suffix), please rename it to `train-images-idx3-ubyte` before proceeding. \n",
"\n",
"----"
]
},
{
Expand Down Expand Up @@ -368,31 +418,6 @@
"!ls"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# unzips mnist\n",
"\n",
"import sys\n",
"import gzip\n",
"import shutil\n",
"\n",
"if (sys.version_info > (3, 0)):\n",
" writemode = 'wb'\n",
"else:\n",
" writemode = 'w'\n",
"\n",
"zipped_mnist = [f for f in os.listdir('./') if f.endswith('ubyte.gz')]\n",
"for z in zipped_mnist:\n",
" with gzip.GzipFile(z, mode='rb') as decompressed, open(z[:-3], writemode) as outfile:\n",
" outfile.write(decompressed.read()) "
]
},
{
"cell_type": "code",
"execution_count": 10,
Expand Down Expand Up @@ -1365,7 +1390,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
"version": "3.6.5"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion code/ch13/ch13.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1434,7 +1434,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
"version": "3.6.5"
}
},
"nbformat": 4,
Expand Down

0 comments on commit ef6c60f

Please sign in to comment.