Skip to content

Commit

Permalink
ch04
Browse files Browse the repository at this point in the history
rasbt committed Jun 16, 2019
1 parent f89a155 commit 1691604
Showing 12 changed files with 127 additions and 180 deletions.
81 changes: 30 additions & 51 deletions ch03/ch03.ipynb

Large diffs are not rendered by default.

Binary file modified ch03/images/03_06.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
120 changes: 55 additions & 65 deletions ch04/ch04.ipynb

Large diffs are not rendered by default.

42 changes: 22 additions & 20 deletions ch04/ch04.py
Original file line number Diff line number Diff line change
@@ -4,10 +4,11 @@
import pandas as pd
from io import StringIO
import sys
from sklearn.preprocessing import Imputer
from sklearn.impute import SimpleImputer
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
@@ -20,9 +21,9 @@
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel

# *Python Machine Learning 2nd Edition* by [Sebastian Raschka](https://sebastianraschka.com), Packt Publishing Ltd. 2017
# *Python Machine Learning 3rd Edition* by [Sebastian Raschka](https://sebastianraschka.com), Packt Publishing Ltd. 2019
#
# Code Repository: https://github.com/rasbt/python-machine-learning-book-2nd-edition
# Code Repository: https://github.com/rasbt/python-machine-learning-book-3rd-edition
#
# Code License: [MIT License](https://github.com/rasbt/python-machine-learning-book-2nd-edition/blob/master/LICENSE.txt)

@@ -43,7 +44,7 @@

# - [Dealing with missing data](#Dealing-with-missing-data)
# - [Identifying missing values in tabular data](#Identifying-missing-values-in-tabular-data)
# - [Eliminating samples or features with missing values](#Eliminating-samples-or-features-with-missing-values)
# - [Eliminating training examples or features with missing values](#Eliminating-training-examples-or-features-with-missing-values)
# - [Imputing missing values](#Imputing-missing-values)
# - [Understanding the scikit-learn estimator API](#Understanding-the-scikit-learn-estimator-API)
# - [Handling categorical data](#Handling-categorical-data)
@@ -101,7 +102,7 @@



# ## Eliminating samples or features with missing values
# ## Eliminating training examples or features with missing values



@@ -159,7 +160,7 @@
# impute missing values via the column mean


imr = Imputer(missing_values='NaN', strategy='mean', axis=0)
imr = SimpleImputer(missing_values=np.nan, strategy='mean')
imr = imr.fit(df.values)
imputed_data = imr.transform(df.values)
imputed_data
@@ -253,15 +254,12 @@
class_le.inverse_transform(y)


# Note: The deprecation warning shown above is due to an implementation detail in scikit-learn. It was already addressed in a pull request (https://github.com/scikit-learn/scikit-learn/pull/9816), and the patch will be released with the next version of scikit-learn (i.e., v. 0.20.0).


# ## Performing one-hot encoding on nominal features



X = df[['color', 'size', 'price']].values

color_le = LabelEncoder()
X[:, 0] = color_le.fit_transform(X[:, 0])
X
@@ -270,17 +268,18 @@



ohe = OneHotEncoder(categorical_features=[0])
ohe.fit_transform(X).toarray()
X = df[['color', 'size', 'price']].values
color_ohe = OneHotEncoder()
color_ohe.fit_transform(X[:, 0].reshape(-1, 1)).toarray()




# return dense array so that we can skip
# the toarray step

ohe = OneHotEncoder(categorical_features=[0], sparse=False)
ohe.fit_transform(X)
X = df[['color', 'size', 'price']].values
c_transf = ColumnTransformer([ ('onehot', OneHotEncoder(), [0]),
('nothing', 'passthrough', [1, 2])])
c_transf.fit_transform(X).astype(float)



@@ -301,8 +300,10 @@

# multicollinearity guard for the OneHotEncoder

ohe = OneHotEncoder(categorical_features=[0])
ohe.fit_transform(X).toarray()[:, 1:]
color_ohe = OneHotEncoder(categories='auto', drop='first')
c_transf = ColumnTransformer([ ('onehot', color_ohe, [0]),
('nothing', 'passthrough', [1, 2])])
c_transf.fit_transform(X).astype(float)



@@ -404,15 +405,15 @@



LogisticRegression(penalty='l1')
LogisticRegression(penalty='l1', solver='liblinear', multi_class='ovr')


# Applied to the standardized Wine data ...




lr = LogisticRegression(penalty='l1', C=1.0)
lr = LogisticRegression(penalty='l1', C=1.0, solver='liblinear', multi_class='ovr')
# Note that C=1.0 is the default. You can increase
# or decrease it to make the regulariztion effect
# stronger or weaker, respectively.
@@ -454,7 +455,8 @@

weights, params = [], []
for c in np.arange(-4., 6.):
lr = LogisticRegression(penalty='l1', C=10.**c, random_state=0)
lr = LogisticRegression(penalty='l1', C=10.**c, solver='liblinear',
multi_class='ovr', random_state=0)
lr.fit(X_train_std, y_train)
weights.append(lr.coef_[1])
params.append(10**c)
2 changes: 1 addition & 1 deletion ch05/ch05.ipynb
Original file line number Diff line number Diff line change
@@ -2161,7 +2161,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.7.1"
},
"toc": {
"nav_menu": {},
51 changes: 13 additions & 38 deletions ch06/ch06.ipynb

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions ch06/ch06.py
Original file line number Diff line number Diff line change
@@ -204,7 +204,8 @@


pipe_lr = make_pipeline(StandardScaler(),
LogisticRegression(penalty='l2', random_state=1, solver='lbfgs'))
LogisticRegression(penalty='l2', random_state=1,
solver='lbfgs', max_iter=10000))

train_sizes, train_scores, test_scores = learning_curve(estimator=pipe_lr,
X=X_train,
@@ -243,7 +244,7 @@
plt.legend(loc='lower right')
plt.ylim([0.8, 1.03])
plt.tight_layout()
plt.savefig('images/06_05.png', dpi=300)
#plt.savefig('images/06_05.png', dpi=300)
plt.show()


Binary file modified ch06/images/06_05.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified ch06/images/06_06.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion ch09/ch09.ipynb
Original file line number Diff line number Diff line change
@@ -1241,7 +1241,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.7.1"
}
},
"nbformat": 4,
2 changes: 1 addition & 1 deletion ch10/ch10.ipynb
Original file line number Diff line number Diff line change
@@ -1714,7 +1714,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.7.1"
}
},
"nbformat": 4,
2 changes: 1 addition & 1 deletion ch11/ch11.ipynb
Original file line number Diff line number Diff line change
@@ -1446,7 +1446,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.7.1"
}
},
"nbformat": 4,

0 comments on commit 1691604

Please sign in to comment.