Skip to content

Commit

Permalink
ch04
Browse files Browse the repository at this point in the history
  • Loading branch information
rasbt committed Jun 16, 2019
1 parent f89a155 commit 1691604
Show file tree
Hide file tree
Showing 12 changed files with 127 additions and 180 deletions.
81 changes: 30 additions & 51 deletions ch03/ch03.ipynb

Large diffs are not rendered by default.

Binary file modified ch03/images/03_06.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
120 changes: 55 additions & 65 deletions ch04/ch04.ipynb

Large diffs are not rendered by default.

42 changes: 22 additions & 20 deletions ch04/ch04.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
import pandas as pd
from io import StringIO
import sys
from sklearn.preprocessing import Imputer
from sklearn.impute import SimpleImputer
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
Expand All @@ -20,9 +21,9 @@
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel

# *Python Machine Learning 2nd Edition* by [Sebastian Raschka](https://sebastianraschka.com), Packt Publishing Ltd. 2017
# *Python Machine Learning 3rd Edition* by [Sebastian Raschka](https://sebastianraschka.com), Packt Publishing Ltd. 2019
#
# Code Repository: https://github.com/rasbt/python-machine-learning-book-2nd-edition
# Code Repository: https://github.com/rasbt/python-machine-learning-book-3rd-edition
#
# Code License: [MIT License](https://github.com/rasbt/python-machine-learning-book-2nd-edition/blob/master/LICENSE.txt)

Expand All @@ -43,7 +44,7 @@

# - [Dealing with missing data](#Dealing-with-missing-data)
# - [Identifying missing values in tabular data](#Identifying-missing-values-in-tabular-data)
# - [Eliminating samples or features with missing values](#Eliminating-samples-or-features-with-missing-values)
# - [Eliminating training examples or features with missing values](#Eliminating-training-examples-or-features-with-missing-values)
# - [Imputing missing values](#Imputing-missing-values)
# - [Understanding the scikit-learn estimator API](#Understanding-the-scikit-learn-estimator-API)
# - [Handling categorical data](#Handling-categorical-data)
Expand Down Expand Up @@ -101,7 +102,7 @@



# ## Eliminating samples or features with missing values
# ## Eliminating training examples or features with missing values



Expand Down Expand Up @@ -159,7 +160,7 @@
# impute missing values via the column mean


imr = Imputer(missing_values='NaN', strategy='mean', axis=0)
imr = SimpleImputer(missing_values=np.nan, strategy='mean')
imr = imr.fit(df.values)
imputed_data = imr.transform(df.values)
imputed_data
Expand Down Expand Up @@ -253,15 +254,12 @@
class_le.inverse_transform(y)


# Note: The deprecation warning shown above is due to an implementation detail in scikit-learn. It was already addressed in a pull request (https://github.com/scikit-learn/scikit-learn/pull/9816), and the patch will be released with the next version of scikit-learn (i.e., v. 0.20.0).


# ## Performing one-hot encoding on nominal features



X = df[['color', 'size', 'price']].values

color_le = LabelEncoder()
X[:, 0] = color_le.fit_transform(X[:, 0])
X
Expand All @@ -270,17 +268,18 @@



ohe = OneHotEncoder(categorical_features=[0])
ohe.fit_transform(X).toarray()
X = df[['color', 'size', 'price']].values
color_ohe = OneHotEncoder()
color_ohe.fit_transform(X[:, 0].reshape(-1, 1)).toarray()




# return dense array so that we can skip
# the toarray step

ohe = OneHotEncoder(categorical_features=[0], sparse=False)
ohe.fit_transform(X)
X = df[['color', 'size', 'price']].values
c_transf = ColumnTransformer([ ('onehot', OneHotEncoder(), [0]),
('nothing', 'passthrough', [1, 2])])
c_transf.fit_transform(X).astype(float)



Expand All @@ -301,8 +300,10 @@

# multicollinearity guard for the OneHotEncoder

ohe = OneHotEncoder(categorical_features=[0])
ohe.fit_transform(X).toarray()[:, 1:]
color_ohe = OneHotEncoder(categories='auto', drop='first')
c_transf = ColumnTransformer([ ('onehot', color_ohe, [0]),
('nothing', 'passthrough', [1, 2])])
c_transf.fit_transform(X).astype(float)



Expand Down Expand Up @@ -404,15 +405,15 @@



LogisticRegression(penalty='l1')
LogisticRegression(penalty='l1', solver='liblinear', multi_class='ovr')


# Applied to the standardized Wine data ...




lr = LogisticRegression(penalty='l1', C=1.0)
lr = LogisticRegression(penalty='l1', C=1.0, solver='liblinear', multi_class='ovr')
# Note that C=1.0 is the default. You can increase
# or decrease it to make the regulariztion effect
# stronger or weaker, respectively.
Expand Down Expand Up @@ -454,7 +455,8 @@

weights, params = [], []
for c in np.arange(-4., 6.):
lr = LogisticRegression(penalty='l1', C=10.**c, random_state=0)
lr = LogisticRegression(penalty='l1', C=10.**c, solver='liblinear',
multi_class='ovr', random_state=0)
lr.fit(X_train_std, y_train)
weights.append(lr.coef_[1])
params.append(10**c)
Expand Down
2 changes: 1 addition & 1 deletion ch05/ch05.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2161,7 +2161,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.7.1"
},
"toc": {
"nav_menu": {},
Expand Down
51 changes: 13 additions & 38 deletions ch06/ch06.ipynb

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions ch06/ch06.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,8 @@


pipe_lr = make_pipeline(StandardScaler(),
LogisticRegression(penalty='l2', random_state=1, solver='lbfgs'))
LogisticRegression(penalty='l2', random_state=1,
solver='lbfgs', max_iter=10000))

train_sizes, train_scores, test_scores = learning_curve(estimator=pipe_lr,
X=X_train,
Expand Down Expand Up @@ -243,7 +244,7 @@
plt.legend(loc='lower right')
plt.ylim([0.8, 1.03])
plt.tight_layout()
plt.savefig('images/06_05.png', dpi=300)
#plt.savefig('images/06_05.png', dpi=300)
plt.show()


Expand Down
Binary file modified ch06/images/06_05.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified ch06/images/06_06.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion ch09/ch09.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1241,7 +1241,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.7.1"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion ch10/ch10.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1714,7 +1714,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.7.1"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion ch11/ch11.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1446,7 +1446,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.7.1"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 1691604

Please sign in to comment.