Skip to content

Commit

Permalink
update parsing scripts (rasbt#43)
Browse files Browse the repository at this point in the history
  • Loading branch information
rasbt authored Sep 16, 2017
1 parent 8c54875 commit 22c3431
Show file tree
Hide file tree
Showing 30 changed files with 139 additions and 252 deletions.
7 changes: 7 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,17 @@ addons:
install:
- sudo apt-get update
- source .travis/install_travis_env.sh

before_script: # configure a headless display to test plot generation
- "export DISPLAY=:99.0"
- "sh -e /etc/init.d/xvfb start"
- sleep 3 # give xvfb some time to start

script:
- bash .travis/run_travis_tests.sh
#after_success:
# - if [[ "${COVERAGE}" == "true" ]]; then coveralls; codecov; else echo "failed"; fi

notifications:
email:
recipients:
Expand Down
15 changes: 11 additions & 4 deletions code/.convert_notebook_to_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import argparse
import os
import subprocess
import textwrap


def convert(input_path, output_path):
Expand All @@ -26,15 +25,23 @@ def cleanup(path):

clean_content = []
imports = []
existing_imports = set()
with open(path, 'r') as f:
next(f)
next(f)
for line in f:
line = line.rstrip(' ')
if line.startswith(skip_lines_startwith):
continue
if line.startswith('import') or (
'from' in line and 'import' in line):
imports.append(line)
if line.startswith('import ') or (
'from ' in line and 'import ' in line):
if 'from __future__ import print_function' in line:
if line != imports[0]:
imports.insert(0, line)
else:
if line.strip() not in existing_imports:
imports.append(line)
existing_imports.add(line.strip())
else:
clean_content.append(line)

Expand Down
2 changes: 1 addition & 1 deletion code/ch02/ch02.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1363,7 +1363,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 1,
"metadata": {},
"outputs": [
{
Expand Down
1 change: 0 additions & 1 deletion code/ch02/ch02.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import ListedColormap

# *Python Machine Learning 2nd Edition* by [Sebastian Raschka](https://sebastianraschka.com), Packt Publishing Ltd. 2017
Expand Down
11 changes: 10 additions & 1 deletion code/ch03/ch03.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1743,7 +1743,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand All @@ -1758,6 +1758,15 @@
"source": [
"! python ../.convert_notebook_to_script.py --input ch03.ipynb --output ch03.py"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
7 changes: 0 additions & 7 deletions code/ch03/ch03.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,9 @@
from sklearn.metrics import accuracy_score
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
import matplotlib.pyplot as plt
import numpy as np
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from pydotplus import graph_from_dot_data
from sklearn.tree import export_graphviz
Expand Down
Binary file added code/ch03/tree.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion code/ch04/ch04.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2522,7 +2522,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [
{
Expand Down
7 changes: 1 addition & 6 deletions code/ch04/ch04.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,19 @@
from io import StringIO
import sys
from sklearn.preprocessing import Imputer
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
from sklearn.base import clone
from itertools import combinations
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
importances = forest.feature_importances_
from sklearn.feature_selection import SelectFromModel

# *Python Machine Learning 2nd Edition* by [Sebastian Raschka](https://sebastianraschka.com), Packt Publishing Ltd. 2017
Expand Down Expand Up @@ -592,6 +586,7 @@ def _calc_score(self, X_train, y_train, X_test, y_test, indices):
random_state=1)

forest.fit(X_train, y_train)
importances = forest.feature_importances_

indices = np.argsort(importances)[::-1]

Expand Down
2 changes: 1 addition & 1 deletion code/ch05/ch05.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2125,7 +2125,7 @@
"output_type": "stream",
"text": [
"[NbConvertApp] Converting notebook ch05.ipynb to script\n",
"[NbConvertApp] Writing 27719 bytes to ch05.py\n"
"[NbConvertApp] Writing 27705 bytes to ch05.py\n"
]
}
],
Expand Down
13 changes: 0 additions & 13 deletions code/ch05/ch05.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,11 @@
from matplotlib.colors import ListedColormap
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.linear_model import LogisticRegression
from scipy.spatial.distance import pdist, squareform
from scipy import exp
from scipy.linalg import eigh
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.decomposition import PCA
from sklearn.datasets import make_circles
from scipy.spatial.distance import pdist, squareform
from scipy import exp
from scipy.linalg import eigh
import numpy as np
from sklearn.decomposition import KernelPCA

# *Python Machine Learning 2nd Edition* by [Sebastian Raschka](https://sebastianraschka.com), Packt Publishing Ltd. 2017
Expand Down Expand Up @@ -922,8 +914,3 @@ def project_x(x_new, X, gamma, alphas, lambdas):








2 changes: 1 addition & 1 deletion code/ch07/ch07.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1534,7 +1534,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [
{
Expand Down
9 changes: 0 additions & 9 deletions code/ch07/ch07.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,16 @@
import math
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
from sklearn.preprocessing import LabelEncoder
from sklearn.externals import six
from sklearn.base import clone
from sklearn.pipeline import _name_estimators
import numpy as np
import operator
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
Expand All @@ -29,13 +25,8 @@
from itertools import product
from sklearn.model_selection import GridSearchCV
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import AdaBoostClassifier

# *Python Machine Learning 2nd Edition* by [Sebastian Raschka](https://sebastianraschka.com), Packt Publishing Ltd. 2017
Expand Down
2 changes: 1 addition & 1 deletion code/ch08/ch08.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1800,7 +1800,7 @@
"output_type": "stream",
"text": [
"[NbConvertApp] Converting notebook ch08.ipynb to script\n",
"[NbConvertApp] Writing 24627 bytes to ch08.py\n"
"[NbConvertApp] Writing 24613 bytes to ch08.py\n"
]
}
],
Expand Down
17 changes: 0 additions & 17 deletions code/ch08/ch08.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
import time
import pyprind
import pandas as pd
import os
import numpy as np
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
Expand All @@ -21,19 +18,10 @@
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
import numpy as np
import re
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.linear_model import SGDClassifier
import pyprind
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

# *Python Machine Learning 2nd Edition* by [Sebastian Raschka](https://sebastianraschka.com), Packt Publishing Ltd. 2017
Expand Down Expand Up @@ -726,8 +714,3 @@ def get_minibatch(doc_stream, size):








6 changes: 3 additions & 3 deletions code/ch10/ch10.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@
" xticklabels=cols)\n",
"\n",
"plt.tight_layout()\n",
"plt.savefig('images/10_04.png', dpi=300)\n",
"# plt.savefig('images/10_04.png', dpi=300)\n",
"plt.show()"
]
},
Expand Down Expand Up @@ -712,7 +712,7 @@
"plt.xlabel('Average number of rooms [RM] (standardized)')\n",
"plt.ylabel('Price in $1000s [MEDV] (standardized)')\n",
"\n",
"plt.savefig('images/10_06.png', dpi=300)\n",
"#plt.savefig('images/10_06.png', dpi=300)\n",
"plt.show()"
]
},
Expand Down Expand Up @@ -1715,7 +1715,7 @@
"output_type": "stream",
"text": [
"[NbConvertApp] Converting notebook ch10.ipynb to script\n",
"[NbConvertApp] Writing 18556 bytes to ch10.py\n"
"[NbConvertApp] Writing 18559 bytes to ch10.py\n"
]
}
],
Expand Down
6 changes: 2 additions & 4 deletions code/ch10/ch10.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,11 @@
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RANSACRegressor
from sklearn.model_selection import train_test_split
import numpy as np
import scipy as sp
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor
Expand Down Expand Up @@ -177,7 +175,7 @@
xticklabels=cols)

plt.tight_layout()
plt.savefig('images/10_04.png', dpi=300)
# plt.savefig('images/10_04.png', dpi=300)
plt.show()


Expand Down Expand Up @@ -262,7 +260,7 @@ def lin_regplot(X, y, model):
plt.xlabel('Average number of rooms [RM] (standardized)')
plt.ylabel('Price in $1000s [MEDV] (standardized)')

plt.savefig('images/10_06.png', dpi=300)
#plt.savefig('images/10_06.png', dpi=300)
plt.show()


Expand Down
1 change: 0 additions & 1 deletion code/ch11/ch11.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from matplotlib import cm
from sklearn.metrics import silhouette_samples
import pandas as pd
import numpy as np
from scipy.spatial.distance import pdist, squareform
from scipy.cluster.hierarchy import linkage
from scipy.cluster.hierarchy import dendrogram
Expand Down
14 changes: 7 additions & 7 deletions code/ch12/ch12.ipynb

Large diffs are not rendered by default.

7 changes: 1 addition & 6 deletions code/ch12/ch12.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,6 @@
import gzip
import shutil
import matplotlib.pyplot as plt
import numpy as np
import numpy as np
import sys
import numpy as np
import matplotlib.pyplot as plt

# *Python Machine Learning 2nd Edition* by [Sebastian Raschka](https://sebastianraschka.com), Packt Publishing Ltd. 2017
#
Expand Down Expand Up @@ -245,7 +240,7 @@ class NeuralNetMLP(object):
Learning rate.
shuffle : bool (default: True)
Shuffles training data every epoch if True to prevent circles.
minibatche_size : int (default: 1)
minibatch_size : int (default: 1)
Number of training samples per minibatch.
seed : int (default: None)
Random seed for initalizing weights and shuffling.
Expand Down
Binary file modified code/ch12/images/12_03.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion code/ch12/neuralnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class NeuralNetMLP(object):
Learning rate.
shuffle : bool (default: True)
Shuffles training data every epoch if True to prevent circles.
minibatche_size : int (default: 1)
minibatch_size : int (default: 1)
Number of training samples per minibatch.
seed : int (default: None)
Random seed for initalizing weights and shuffling.
Expand Down
Loading

0 comments on commit 22c3431

Please sign in to comment.