Skip to content

Commit

Permalink
upd ch08
Browse files Browse the repository at this point in the history
rasbt committed Dec 5, 2019
1 parent 93854b6 commit e250ac5
Showing 1 changed file with 116 additions and 59 deletions.
175 changes: 116 additions & 59 deletions ch08/ch08.ipynb
Original file line number Diff line number Diff line change
@@ -42,14 +42,14 @@
"output_type": "stream",
"text": [
"Sebastian Raschka \n",
"last updated: 2019-11-17 \n",
"last updated: 2019-12-05 \n",
"\n",
"CPython 3.7.1\n",
"IPython 7.9.0\n",
"IPython 7.10.1\n",
"\n",
"numpy 1.17.2\n",
"pandas 0.24.2\n",
"sklearn 0.21.3\n",
"numpy 1.17.4\n",
"pandas 0.25.3\n",
"sklearn 0.22\n",
"nltk 3.4.5\n"
]
}
@@ -161,7 +161,53 @@
"name": "stdout",
"output_type": "stream",
"text": [
"100% | 80 MB | 0.74 MB/s | 108 sec elapsed"
"32% | 25 MB | 6.34 MB/s | 4 sec elapsed"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"IOPub message rate exceeded.\n",
"The notebook server will temporarily stop sending output\n",
"to the client in order to avoid crashing it.\n",
"To change this limit, set the config variable\n",
"`--NotebookApp.iopub_msg_rate_limit`.\n",
"\n",
"Current values:\n",
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
"NotebookApp.rate_limit_window=3.0 (secs)\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"72% | 58 MB | 7.86 MB/s | 7 sec elapsed"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"IOPub message rate exceeded.\n",
"The notebook server will temporarily stop sending output\n",
"to the client in order to avoid crashing it.\n",
"To change this limit, set the config variable\n",
"`--NotebookApp.iopub_msg_rate_limit`.\n",
"\n",
"Current values:\n",
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
"NotebookApp.rate_limit_window=3.0 (secs)\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"100% | 80 MB | 8.24 MB/s | 9 sec elapsed"
]
}
],
@@ -225,7 +271,7 @@
"output_type": "stream",
"text": [
"0% [##############################] 100% | ETA: 00:00:00\n",
"Total time elapsed: 00:02:06\n"
"Total time elapsed: 00:01:51\n"
]
}
],
@@ -820,7 +866,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
@@ -838,7 +884,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 23,
"metadata": {},
"outputs": [
{
@@ -847,7 +893,7 @@
"['runners', 'like', 'running', 'and', 'thus', 'they', 'run']"
]
},
"execution_count": 25,
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@@ -858,7 +904,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 24,
"metadata": {},
"outputs": [
{
@@ -867,7 +913,7 @@
"['runner', 'like', 'run', 'and', 'thu', 'they', 'run']"
]
},
"execution_count": 26,
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -878,7 +924,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 25,
"metadata": {},
"outputs": [
{
@@ -887,7 +933,7 @@
"text": [
"[nltk_data] Downloading package stopwords to\n",
"[nltk_data] /Users/sebastian/nltk_data...\n",
"[nltk_data] Unzipping corpora/stopwords.zip.\n"
"[nltk_data] Package stopwords is already up-to-date!\n"
]
},
{
@@ -896,7 +942,7 @@
"True"
]
},
"execution_count": 27,
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@@ -909,7 +955,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 26,
"metadata": {},
"outputs": [
{
@@ -918,7 +964,7 @@
"['runner', 'like', 'run', 'run', 'lot']"
]
},
"execution_count": 28,
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
@@ -955,7 +1001,7 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
@@ -967,7 +1013,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
@@ -1038,7 +1084,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 29,
"metadata": {},
"outputs": [
{
@@ -1053,15 +1099,17 @@
"output_type": "stream",
"text": [
"[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.\n",
"[Parallel(n_jobs=-1)]: Done 17 tasks | elapsed: 4.0min\n",
"[Parallel(n_jobs=-1)]: Done 138 tasks | elapsed: 22.6min\n",
"[Parallel(n_jobs=-1)]: Done 240 out of 240 | elapsed: 39.3min finished\n"
"/Users/sebastian/miniconda3/lib/python3.7/site-packages/joblib/externals/loky/process_executor.py:706: UserWarning: A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.\n",
" \"timeout or by a memory leak.\", UserWarning\n",
"[Parallel(n_jobs=-1)]: Done 17 tasks | elapsed: 4.1min\n",
"[Parallel(n_jobs=-1)]: Done 138 tasks | elapsed: 24.2min\n",
"[Parallel(n_jobs=-1)]: Done 240 out of 240 | elapsed: 41.7min finished\n"
]
},
{
"data": {
"text/plain": [
"GridSearchCV(cv=5, error_score='raise-deprecating',\n",
"GridSearchCV(cv=5, error_score=nan,\n",
" estimator=Pipeline(memory=None,\n",
" steps=[('vect',\n",
" TfidfVectorizer(analyzer='word',\n",
@@ -1078,7 +1126,8 @@
" norm='l2',\n",
" preprocessor=None,\n",
" smooth_idf=True,\n",
" stop_word...\n",
" stop_words=None,\n",
" strip_acc...\n",
" \"you'll\", \"you'd\", 'your',\n",
" 'yours', 'yourself',\n",
" 'yourselves', 'he', 'him',\n",
@@ -1087,14 +1136,14 @@
" 'herself', 'it', \"it's\", 'its',\n",
" 'itself', ...],\n",
" None],\n",
" 'vect__tokenizer': [<function tokenizer at 0x7fcc81087d90>,\n",
" <function tokenizer_porter at 0x7fcc81087c80>],\n",
" 'vect__tokenizer': [<function tokenizer at 0x7fbc89ae1ea0>,\n",
" <function tokenizer_porter at 0x7fbc89ae1f28>],\n",
" 'vect__use_idf': [False]}],\n",
" pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n",
" scoring='accuracy', verbose=2)"
]
},
"execution_count": 37,
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
@@ -1105,14 +1154,14 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best parameter set: {'clf__C': 10.0, 'clf__penalty': 'l2', 'vect__ngram_range': (1, 1), 'vect__stop_words': None, 'vect__tokenizer': <function tokenizer at 0x7fcc81087d90>} \n",
"Best parameter set: {'clf__C': 10.0, 'clf__penalty': 'l2', 'vect__ngram_range': (1, 1), 'vect__stop_words': None, 'vect__tokenizer': <function tokenizer at 0x7fbc89ae1ea0>} \n",
"CV Accuracy: 0.897\n"
]
}
@@ -1124,7 +1173,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 31,
"metadata": {},
"outputs": [
{
@@ -1159,16 +1208,24 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/sebastian/miniconda3/lib/python3.7/site-packages/sklearn/model_selection/_split.py:296: FutureWarning: Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.\n",
" FutureWarning\n"
]
},
{
"data": {
"text/plain": [
"array([0.4, 0.2, 0.6, 0.2, 0.4])"
]
},
"execution_count": 40,
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
@@ -1202,7 +1259,7 @@
},
{
"cell_type": "code",
"execution_count": 41,
"execution_count": 33,
"metadata": {},
"outputs": [
{
@@ -1256,16 +1313,16 @@
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.36"
"0.36000000000000004"
]
},
"execution_count": 42,
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
@@ -1283,7 +1340,7 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 35,
"metadata": {},
"outputs": [
{
@@ -1292,7 +1349,7 @@
"0.36000000000000004"
]
},
"execution_count": 43,
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
@@ -1329,7 +1386,7 @@
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
@@ -1358,7 +1415,7 @@
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
@@ -1393,7 +1450,7 @@
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": 38,
"metadata": {},
"outputs": [
{
@@ -1403,7 +1460,7 @@
" 1)"
]
},
"execution_count": 46,
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
@@ -1414,7 +1471,7 @@
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
@@ -1432,7 +1489,7 @@
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
@@ -1448,7 +1505,7 @@
},
{
"cell_type": "code",
"execution_count": 49,
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
@@ -1463,15 +1520,15 @@
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"0% [##############################] 100% | ETA: 00:00:00\n",
"Total time elapsed: 00:00:18\n"
"Total time elapsed: 00:00:20\n"
]
}
],
@@ -1491,7 +1548,7 @@
},
{
"cell_type": "code",
"execution_count": 51,
"execution_count": 43,
"metadata": {},
"outputs": [
{
@@ -1510,7 +1567,7 @@
},
{
"cell_type": "code",
"execution_count": 52,
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
@@ -1540,7 +1597,7 @@
},
{
"cell_type": "code",
"execution_count": 53,
"execution_count": 45,
"metadata": {},
"outputs": [
{
@@ -1595,7 +1652,7 @@
"2 ***SPOILER*** Do not read this, if you think a... 0"
]
},
"execution_count": 53,
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
@@ -1609,7 +1666,7 @@
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
@@ -1623,7 +1680,7 @@
},
{
"cell_type": "code",
"execution_count": 55,
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
@@ -1637,7 +1694,7 @@
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 48,
"metadata": {},
"outputs": [
{
@@ -1646,7 +1703,7 @@
"(10, 5000)"
]
},
"execution_count": 56,
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
@@ -1657,7 +1714,7 @@
},
{
"cell_type": "code",
"execution_count": 57,
"execution_count": 49,
"metadata": {},
"outputs": [
{
@@ -1725,7 +1782,7 @@
},
{
"cell_type": "code",
"execution_count": 58,
"execution_count": 50,
"metadata": {
"scrolled": true
},
@@ -1794,7 +1851,7 @@
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": 51,
"metadata": {},
"outputs": [
{

0 comments on commit e250ac5

Please sign in to comment.