ch04

bobfromtula · Jun 16, 2019 · 1691604 · 1691604
1 parent f89a155
commit 1691604
Show file tree

Hide file tree

Showing 12 changed files with 127 additions and 180 deletions.
diff --git a/ch03/ch03.ipynb b/ch03/ch03.ipynb
diff --git a/ch03/images/03_06.png b/ch03/images/03_06.png
diff --git a/ch04/ch04.ipynb b/ch04/ch04.ipynb
diff --git a/ch04/ch04.py b/ch04/ch04.py
@@ -4,10 +4,11 @@
 import pandas as pd
 from io import StringIO
 import sys
-from sklearn.preprocessing import Imputer
+from sklearn.impute import SimpleImputer
 import numpy as np
 from sklearn.preprocessing import LabelEncoder
 from sklearn.preprocessing import OneHotEncoder
+from sklearn.compose import ColumnTransformer
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import MinMaxScaler
 from sklearn.preprocessing import StandardScaler
@@ -20,9 +21,9 @@
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.feature_selection import SelectFromModel
 
-# *Python Machine Learning 2nd Edition* by [Sebastian Raschka](https://sebastianraschka.com), Packt Publishing Ltd. 2017
+# *Python Machine Learning 3rd Edition* by [Sebastian Raschka](https://sebastianraschka.com), Packt Publishing Ltd. 2019
 # 
-# Code Repository: https://github.com/rasbt/python-machine-learning-book-2nd-edition
+# Code Repository: https://github.com/rasbt/python-machine-learning-book-3rd-edition
 # 
 # Code License: [MIT License](https://github.com/rasbt/python-machine-learning-book-2nd-edition/blob/master/LICENSE.txt)
 
@@ -43,7 +44,7 @@
 
 # - [Dealing with missing data](#Dealing-with-missing-data)
 #   - [Identifying missing values in tabular data](#Identifying-missing-values-in-tabular-data)
-#   - [Eliminating samples or features with missing values](#Eliminating-samples-or-features-with-missing-values)
+#   - [Eliminating training examples or features with missing values](#Eliminating-training-examples-or-features-with-missing-values)
 #   - [Imputing missing values](#Imputing-missing-values)
 #   - [Understanding the scikit-learn estimator API](#Understanding-the-scikit-learn-estimator-API)
 # - [Handling categorical data](#Handling-categorical-data)
@@ -101,7 +102,7 @@
 
 
 
-# ## Eliminating samples or features with missing values
+# ## Eliminating training examples or features with missing values
 
 
 
@@ -159,7 +160,7 @@
 # impute missing values via the column mean
 
 
-imr = Imputer(missing_values='NaN', strategy='mean', axis=0)
+imr = SimpleImputer(missing_values=np.nan, strategy='mean')
 imr = imr.fit(df.values)
 imputed_data = imr.transform(df.values)
 imputed_data
@@ -253,15 +254,12 @@
 class_le.inverse_transform(y)
 
 
-# Note: The deprecation warning shown above is due to an implementation detail in scikit-learn. It was already addressed in a pull request (https://github.com/scikit-learn/scikit-learn/pull/9816), and the patch will be released with the next version of scikit-learn (i.e., v. 0.20.0).
-
 
 # ## Performing one-hot encoding on nominal features
 
 
 
 X = df[['color', 'size', 'price']].values
-
 color_le = LabelEncoder()
 X[:, 0] = color_le.fit_transform(X[:, 0])
 X
@@ -270,17 +268,18 @@
 
 
 
-ohe = OneHotEncoder(categorical_features=[0])
-ohe.fit_transform(X).toarray()
+X = df[['color', 'size', 'price']].values
+color_ohe = OneHotEncoder()
+color_ohe.fit_transform(X[:, 0].reshape(-1, 1)).toarray()
 
 
 
 
-# return dense array so that we can skip
-# the toarray step
 
-ohe = OneHotEncoder(categorical_features=[0], sparse=False)
-ohe.fit_transform(X)
+X = df[['color', 'size', 'price']].values
+c_transf = ColumnTransformer([ ('onehot', OneHotEncoder(), [0]),
+                               ('nothing', 'passthrough', [1, 2])])
+c_transf.fit_transform(X).astype(float)
 
 
 
@@ -301,8 +300,10 @@
 
 # multicollinearity guard for the OneHotEncoder
 
-ohe = OneHotEncoder(categorical_features=[0])
-ohe.fit_transform(X).toarray()[:, 1:]
+color_ohe = OneHotEncoder(categories='auto', drop='first')
+c_transf = ColumnTransformer([ ('onehot', color_ohe, [0]),
+                               ('nothing', 'passthrough', [1, 2])])
+c_transf.fit_transform(X).astype(float)
 
 
 
@@ -404,15 +405,15 @@
 
 
 
-LogisticRegression(penalty='l1')
+LogisticRegression(penalty='l1', solver='liblinear', multi_class='ovr')
 
 
 # Applied to the standardized Wine data ...
 
 
 
 
-lr = LogisticRegression(penalty='l1', C=1.0)
+lr = LogisticRegression(penalty='l1', C=1.0, solver='liblinear', multi_class='ovr')
 # Note that C=1.0 is the default. You can increase
 # or decrease it to make the regulariztion effect
 # stronger or weaker, respectively.
@@ -454,7 +455,8 @@
 
 weights, params = [], []
 for c in np.arange(-4., 6.):
-    lr = LogisticRegression(penalty='l1', C=10.**c, random_state=0)
+    lr = LogisticRegression(penalty='l1', C=10.**c, solver='liblinear', 
+                            multi_class='ovr', random_state=0)
     lr.fit(X_train_std, y_train)
     weights.append(lr.coef_[1])
     params.append(10**c)

diff --git a/ch05/ch05.ipynb b/ch05/ch05.ipynb
@@ -2161,7 +2161,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.7.1"
   },
   "toc": {
    "nav_menu": {},

diff --git a/ch06/ch06.ipynb b/ch06/ch06.ipynb
diff --git a/ch06/ch06.py b/ch06/ch06.py
@@ -204,7 +204,8 @@
 
 
 pipe_lr = make_pipeline(StandardScaler(),
-                        LogisticRegression(penalty='l2', random_state=1, solver='lbfgs'))
+                        LogisticRegression(penalty='l2', random_state=1,
+                                           solver='lbfgs', max_iter=10000))
 
 train_sizes, train_scores, test_scores =                learning_curve(estimator=pipe_lr,
                                X=X_train,
@@ -243,7 +244,7 @@
 plt.legend(loc='lower right')
 plt.ylim([0.8, 1.03])
 plt.tight_layout()
-plt.savefig('images/06_05.png', dpi=300)
+#plt.savefig('images/06_05.png', dpi=300)
 plt.show()
 
 

diff --git a/ch06/images/06_05.png b/ch06/images/06_05.png
diff --git a/ch06/images/06_06.png b/ch06/images/06_06.png
diff --git a/ch09/ch09.ipynb b/ch09/ch09.ipynb
@@ -1241,7 +1241,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.7.1"
   }
  },
  "nbformat": 4,

diff --git a/ch10/ch10.ipynb b/ch10/ch10.ipynb
@@ -1714,7 +1714,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.7.1"
   }
  },
  "nbformat": 4,

diff --git a/ch11/ch11.ipynb b/ch11/ch11.ipynb
@@ -1446,7 +1446,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.7.1"
   }
  },
  "nbformat": 4,