some minor comments added

zhongkailv · Jul 5, 2018 · ef6c60f · ef6c60f
1 parent 36219d8
commit ef6c60f
Show file tree

Hide file tree

Showing 4 changed files with 113 additions and 62 deletions.
diff --git a/code/ch08/ch08.ipynb b/code/ch08/ch08.ipynb
@@ -42,15 +42,15 @@
      "output_type": "stream",
      "text": [
       "Sebastian Raschka \n",
-      "last updated: 2017-09-02 \n",
+      "last updated: 2018-07-02 \n",
       "\n",
-      "CPython 3.6.1\n",
-      "IPython 6.1.0\n",
+      "CPython 3.6.5\n",
+      "IPython 6.4.0\n",
       "\n",
-      "numpy 1.12.1\n",
-      "pandas 0.20.3\n",
-      "sklearn 0.19.0\n",
-      "nltk 3.2.4\n"
+      "numpy 1.14.5\n",
+      "pandas 0.23.1\n",
+      "sklearn 0.19.1\n",
+      "nltk 3.3\n"
      ]
     }
    ],
@@ -155,7 +155,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "100% | 80 MB | 1.03 MB/s | 77 sec elapsed"
+      "100% | 80 MB | 1.49 MB/s | 53 sec elapsed"
      ]
     }
    ],
@@ -198,9 +198,7 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "if not os.path.isdir('aclImdb'):\n",
@@ -218,15 +216,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
       "0% [##############################] 100% | ETA: 00:00:00\n",
-      "Total time elapsed: 00:02:21\n"
+      "Total time elapsed: 00:04:19\n"
      ]
     }
    ],
@@ -246,7 +244,7 @@
     "for s in ('test', 'train'):\n",
     "    for l in ('pos', 'neg'):\n",
     "        path = os.path.join(basepath, s, l)\n",
-    "        for file in os.listdir(path):\n",
+    "        for file in sorted(os.listdir(path)):\n",
     "            with open(os.path.join(path, file), \n",
     "                      'r', encoding='utf-8') as infile:\n",
     "                txt = infile.read()\n",
@@ -265,10 +263,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": 5,
+   "metadata": {},
    "outputs": [],
    "source": [
     "import numpy as np\n",
@@ -286,36 +282,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": 6,
+   "metadata": {},
    "outputs": [],
    "source": [
     "df.to_csv('movie_data.csv', index=False, encoding='utf-8')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -352,7 +346,7 @@
        "2  ***SPOILER*** Do not read this, if you think a...          0"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -364,14 +358,34 @@
     "df.head(3)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(50000, 2)"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.shape"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "<hr>\n",
     "### Note\n",
     "\n",
-    "If you have problems with creating the `movie_data.csv` file in the previous chapter, you can find a download a zip archive at \n",
+    "If you have problems with creating the `movie_data.csv`, you can find a download a zip archive at \n",
     "https://github.com/rasbt/python-machine-learning-book-2nd-edition/tree/master/code/ch08/\n",
     "<hr>"
    ]
@@ -1855,6 +1869,18 @@
    "language": "python",
    "name": "python3"
   },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  },
   "toc": {
    "nav_menu": {},
    "number_sections": true,

diff --git a/code/ch10/ch10.ipynb b/code/ch10/ch10.ipynb
@@ -1740,7 +1740,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.1"
+   "version": "3.6.5"
   }
  },
  "nbformat": 4,

diff --git a/code/ch12/ch12.ipynb b/code/ch12/ch12.ipynb
@@ -304,11 +304,61 @@
     "- Test set images: t10k-images-idx3-ubyte.gz (1.6 MB, 7.8 MB, 10,000 samples)\n",
     "- Test set labels: t10k-labels-idx1-ubyte.gz (5 KB, 10 KB unzipped, 10,000 labels)\n",
     "\n",
-    "In this section, we will only be working with a subset of MNIST, thus, we only need to download the training set images and training set labels. After downloading the files, I recommend unzipping the files using the Unix/Linux gzip tool from the terminal for efficiency, e.g., using the command \n",
+    "In this section, we will only be working with a subset of MNIST, thus, we only need to download the training set images and training set labels. \n",
+    "\n",
+    "After downloading the files, simply run the next code cell to unzip the files.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# unzips mnist\n",
+    "\n",
+    "import sys\n",
+    "import gzip\n",
+    "import shutil\n",
+    "\n",
+    "if (sys.version_info > (3, 0)):\n",
+    "    writemode = 'wb'\n",
+    "else:\n",
+    "    writemode = 'w'\n",
+    "\n",
+    "zipped_mnist = [f for f in os.listdir('./') if f.endswith('ubyte.gz')]\n",
+    "for z in zipped_mnist:\n",
+    "    with gzip.GzipFile(z, mode='rb') as decompressed, open(z[:-3], writemode) as outfile:\n",
+    "        outfile.write(decompressed.read()) "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "----\n",
+    "\n",
+    "IGNORE IF THE CODE CELL ABOVE EXECUTED WITHOUT PROBLEMS:\n",
+    "    \n",
+    "If you have issues with the code cell above, I recommend unzipping the files using the Unix/Linux gzip tool from the terminal for efficiency, e.g., using the command \n",
     "\n",
     "    gzip *ubyte.gz -d\n",
     " \n",
-    "in your local MNIST download directory, or, using your favorite unzipping tool if you are working with a machine running on Microsoft Windows. The images are stored in byte form, and using the following function, we will read them into NumPy arrays that we will use to train our MLP.\n"
+    "in your local MNIST download directory, or, using your favorite unzipping tool if you are working with a machine running on Microsoft Windows. The images are stored in byte form, and using the following function, we will read them into NumPy arrays that we will use to train our MLP.\n",
+    "\n",
+    "Please note that if you are **not** using gzip, please make sure tha the files are named\n",
+    "\n",
+    "- train-images-idx3-ubyte\n",
+    "- train-labels-idx1-ubyte\n",
+    "- t10k-images-idx3-ubyte\n",
+    "- t10k-labels-idx1-ubyte\n",
+    "\n",
+    "If a file is e.g., named `train-images.idx3-ubyte` after unzipping (this is due to the fact that certain tools try to guess a file suffix), please rename it to `train-images-idx3-ubyte` before proceeding. \n",
+    "\n",
+    "----"
    ]
   },
   {
@@ -368,31 +418,6 @@
     "!ls"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "# unzips mnist\n",
-    "\n",
-    "import sys\n",
-    "import gzip\n",
-    "import shutil\n",
-    "\n",
-    "if (sys.version_info > (3, 0)):\n",
-    "    writemode = 'wb'\n",
-    "else:\n",
-    "    writemode = 'w'\n",
-    "\n",
-    "zipped_mnist = [f for f in os.listdir('./') if f.endswith('ubyte.gz')]\n",
-    "for z in zipped_mnist:\n",
-    "    with gzip.GzipFile(z, mode='rb') as decompressed, open(z[:-3], writemode) as outfile:\n",
-    "        outfile.write(decompressed.read()) "
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 10,
@@ -1365,7 +1390,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.1"
+   "version": "3.6.5"
   }
  },
  "nbformat": 4,

diff --git a/code/ch13/ch13.ipynb b/code/ch13/ch13.ipynb
@@ -1434,7 +1434,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.1"
+   "version": "3.6.5"
   }
  },
  "nbformat": 4,