upd ch08

mario-s · Dec 5, 2019 · e250ac5 · e250ac5
1 parent 93854b6
commit e250ac5
Showing 1 changed file with 116 additions and 59 deletions.
diff --git a/ch08/ch08.ipynb b/ch08/ch08.ipynb
@@ -42,14 +42,14 @@
      "output_type": "stream",
      "text": [
       "Sebastian Raschka \n",
-      "last updated: 2019-11-17 \n",
+      "last updated: 2019-12-05 \n",
       "\n",
       "CPython 3.7.1\n",
-      "IPython 7.9.0\n",
+      "IPython 7.10.1\n",
       "\n",
-      "numpy 1.17.2\n",
-      "pandas 0.24.2\n",
-      "sklearn 0.21.3\n",
+      "numpy 1.17.4\n",
+      "pandas 0.25.3\n",
+      "sklearn 0.22\n",
       "nltk 3.4.5\n"
      ]
     }
@@ -161,7 +161,53 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "100% | 80 MB | 0.74 MB/s | 108 sec elapsed"
+      "32% | 25 MB | 6.34 MB/s | 4 sec elapsed"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "72% | 58 MB | 7.86 MB/s | 7 sec elapsed"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "100% | 80 MB | 8.24 MB/s | 9 sec elapsed"
      ]
     }
    ],
@@ -225,7 +271,7 @@
      "output_type": "stream",
      "text": [
       "0% [##############################] 100% | ETA: 00:00:00\n",
-      "Total time elapsed: 00:02:06\n"
+      "Total time elapsed: 00:01:51\n"
      ]
     }
    ],
@@ -820,7 +866,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -838,7 +884,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -847,7 +893,7 @@
        "['runners', 'like', 'running', 'and', 'thus', 'they', 'run']"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -858,7 +904,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
@@ -867,7 +913,7 @@
        "['runner', 'like', 'run', 'and', 'thu', 'they', 'run']"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -878,7 +924,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -887,7 +933,7 @@
      "text": [
       "[nltk_data] Downloading package stopwords to\n",
       "[nltk_data]     /Users/sebastian/nltk_data...\n",
-      "[nltk_data]   Unzipping corpora/stopwords.zip.\n"
+      "[nltk_data]   Package stopwords is already up-to-date!\n"
      ]
     },
     {
@@ -896,7 +942,7 @@
        "True"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -909,7 +955,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -918,7 +964,7 @@
        "['runner', 'like', 'run', 'run', 'lot']"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -955,7 +1001,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -967,7 +1013,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1038,7 +1084,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -1053,15 +1099,17 @@
      "output_type": "stream",
      "text": [
       "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.\n",
-      "[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  4.0min\n",
-      "[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed: 22.6min\n",
-      "[Parallel(n_jobs=-1)]: Done 240 out of 240 | elapsed: 39.3min finished\n"
+      "/Users/sebastian/miniconda3/lib/python3.7/site-packages/joblib/externals/loky/process_executor.py:706: UserWarning: A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.\n",
+      "  \"timeout or by a memory leak.\", UserWarning\n",
+      "[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  4.1min\n",
+      "[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed: 24.2min\n",
+      "[Parallel(n_jobs=-1)]: Done 240 out of 240 | elapsed: 41.7min finished\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "GridSearchCV(cv=5, error_score='raise-deprecating',\n",
+       "GridSearchCV(cv=5, error_score=nan,\n",
        "             estimator=Pipeline(memory=None,\n",
        "                                steps=[('vect',\n",
        "                                        TfidfVectorizer(analyzer='word',\n",
@@ -1078,7 +1126,8 @@
        "                                                        norm='l2',\n",
        "                                                        preprocessor=None,\n",
        "                                                        smooth_idf=True,\n",
-       "                                                        stop_word...\n",
+       "                                                        stop_words=None,\n",
+       "                                                        strip_acc...\n",
        "                                                \"you'll\", \"you'd\", 'your',\n",
        "                                                'yours', 'yourself',\n",
        "                                                'yourselves', 'he', 'him',\n",
@@ -1087,14 +1136,14 @@
        "                                                'herself', 'it', \"it's\", 'its',\n",
        "                                                'itself', ...],\n",
        "                                               None],\n",
-       "                          'vect__tokenizer': [<function tokenizer at 0x7fcc81087d90>,\n",
-       "                                              <function tokenizer_porter at 0x7fcc81087c80>],\n",
+       "                          'vect__tokenizer': [<function tokenizer at 0x7fbc89ae1ea0>,\n",
+       "                                              <function tokenizer_porter at 0x7fbc89ae1f28>],\n",
        "                          'vect__use_idf': [False]}],\n",
        "             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n",
        "             scoring='accuracy', verbose=2)"
       ]
      },
-     "execution_count": 37,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1105,14 +1154,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Best parameter set: {'clf__C': 10.0, 'clf__penalty': 'l2', 'vect__ngram_range': (1, 1), 'vect__stop_words': None, 'vect__tokenizer': <function tokenizer at 0x7fcc81087d90>} \n",
+      "Best parameter set: {'clf__C': 10.0, 'clf__penalty': 'l2', 'vect__ngram_range': (1, 1), 'vect__stop_words': None, 'vect__tokenizer': <function tokenizer at 0x7fbc89ae1ea0>} \n",
       "CV Accuracy: 0.897\n"
      ]
     }
@@ -1124,7 +1173,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -1159,16 +1208,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/sebastian/miniconda3/lib/python3.7/site-packages/sklearn/model_selection/_split.py:296: FutureWarning: Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.\n",
+      "  FutureWarning\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
        "array([0.4, 0.2, 0.6, 0.2, 0.4])"
       ]
      },
-     "execution_count": 40,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1202,7 +1259,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [
     {
@@ -1256,16 +1313,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.36"
+       "0.36000000000000004"
       ]
      },
-     "execution_count": 42,
+     "execution_count": 34,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1283,7 +1340,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
@@ -1292,7 +1349,7 @@
        "0.36000000000000004"
       ]
      },
-     "execution_count": 43,
+     "execution_count": 35,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1329,7 +1386,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1358,7 +1415,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1393,7 +1450,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
@@ -1403,7 +1460,7 @@
        " 1)"
       ]
      },
-     "execution_count": 46,
+     "execution_count": 38,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1414,7 +1471,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1432,7 +1489,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1448,7 +1505,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1463,15 +1520,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
       "0% [##############################] 100% | ETA: 00:00:00\n",
-      "Total time elapsed: 00:00:18\n"
+      "Total time elapsed: 00:00:20\n"
      ]
     }
    ],
@@ -1491,7 +1548,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [
     {
@@ -1510,7 +1567,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1540,7 +1597,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [
     {
@@ -1595,7 +1652,7 @@
        "2  ***SPOILER*** Do not read this, if you think a...          0"
       ]
      },
-     "execution_count": 53,
+     "execution_count": 45,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1609,7 +1666,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 46,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1623,7 +1680,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 47,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1637,7 +1694,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 48,
    "metadata": {},
    "outputs": [
     {
@@ -1646,7 +1703,7 @@
        "(10, 5000)"
       ]
      },
-     "execution_count": 56,
+     "execution_count": 48,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1657,7 +1714,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 49,
    "metadata": {},
    "outputs": [
     {
@@ -1725,7 +1782,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 50,
    "metadata": {
     "scrolled": true
    },
@@ -1794,7 +1851,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 51,
    "metadata": {},
    "outputs": [
     {