Skip to content

Commit

Permalink
Add multilayer perceptron.
Browse files Browse the repository at this point in the history
  • Loading branch information
trekhleb committed Dec 20, 2018
1 parent 80c5449 commit 97bbb29
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 14 deletions.
32 changes: 21 additions & 11 deletions homemade/neural_network/multilayer_perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,14 @@ def train(self, regularization_param=0, max_iterations=1000):
return self.thetas, cost_histories

@staticmethod
def gradient_descent(data, labels, initial_theta, layers, regularization_param, max_iteration):
def gradient_descent(data, labels, unrolled_theta, layers, regularization_param, max_iteration):
"""Gradient descent function.
Iteratively optimizes theta model parameters.
:param data: the set of training or test data.
:param labels: training set outputs (0 or 1 that defines the class of an example).
:param initial_theta: initial model parameters.
:param unrolled_theta: initial model parameters.
:param layers: model layers configuration.
:param regularization_param: regularization parameter.
:param max_iteration: maximum number of gradient descent steps.
Expand All @@ -67,21 +67,31 @@ def gradient_descent(data, labels, initial_theta, layers, regularization_param,
# Launch gradient descent.
minification_result = minimize(
# Function that we're going to minimize.
lambda current_theta: MultilayerPerceptron.cost_function(
data, labels, current_theta, layers, regularization_param
lambda current_unrolled_theta: MultilayerPerceptron.cost_function(
data,
labels,
MultilayerPerceptron.thetas_roll(current_unrolled_theta, layers),
layers,
regularization_param
),
# Initial values of model parameter.
initial_theta,
unrolled_theta,
# We will use conjugate gradient algorithm.
method='CG',
# Function that will help to calculate gradient direction on each step.
jac=lambda current_theta: MultilayerPerceptron.gradient_step(
data, labels, current_theta, layers, regularization_param
jac=lambda current_unrolled_theta: MultilayerPerceptron.gradient_step(
data, labels, current_unrolled_theta, layers, regularization_param
),
# Record gradient descent progress for debugging.
callback=lambda current_theta: cost_history.append(MultilayerPerceptron.cost_function(
data, labels, current_theta, layers, regularization_param
)),
callback=lambda current_unrolled_theta: cost_history.append(
MultilayerPerceptron.cost_function(
data,
labels,
MultilayerPerceptron.thetas_roll(current_unrolled_theta, layers),
layers,
regularization_param
)
),
options={'maxiter': max_iteration}
)

Expand All @@ -101,7 +111,7 @@ def gradient_step(data, labels, unrolled_thetas, layers, regularization_param):
:param data: training set.
:param labels: training set labels.
:param unrolled_thetas: flat vector of model parameters.
:param unrolled_thetas: model parameters.
:param layers: model layers configuration.
:param regularization_param: parameters that fights with model over-fitting.
"""
Expand Down
25 changes: 22 additions & 3 deletions notebooks/neural_network/multilayer_perceptron_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -489,15 +489,34 @@
},
{
"cell_type": "code",
"execution_count": 125,
"execution_count": 127,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1.54168371e-06 0.00000000e+00 0.00000000e+00 ... 2.03102558e-01\n",
" 1.36934643e-01 1.77471528e-01]\n"
"[-0.05339685 0.06088466 -0.07526288 ... 0.08618594 -0.04829076\n",
" 0.0289159 ]\n"
]
},
{
"ename": "ValueError",
"evalue": "Scalar operands are not allowed, use '*' instead",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-127-6e0a128d10e2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;31m# Train neural network.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0;34m(\u001b[0m\u001b[0mthetas\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcosts\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmultilayer_perceptron\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mregularization_param\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_iterations\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/Projects/homemade-machine-learning/homemade/neural_network/multilayer_perceptron.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, regularization_param, max_iterations)\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlayers\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[0mregularization_param\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 45\u001b[0;31m \u001b[0mmax_iterations\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 46\u001b[0m )\n\u001b[1;32m 47\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Projects/homemade-machine-learning/homemade/neural_network/multilayer_perceptron.py\u001b[0m in \u001b[0;36mgradient_descent\u001b[0;34m(data, labels, initial_theta, layers, regularization_param, max_iteration)\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcurrent_theta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlayers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mregularization_param\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 84\u001b[0m )),\n\u001b[0;32m---> 85\u001b[0;31m \u001b[0moptions\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'maxiter'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mmax_iteration\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 86\u001b[0m )\n\u001b[1;32m 87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Projects/homemade-machine-learning/env/lib/python3.7/site-packages/scipy/optimize/_minimize.py\u001b[0m in \u001b[0;36mminimize\u001b[0;34m(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)\u001b[0m\n\u001b[1;32m 593\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_minimize_powell\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfun\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 594\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mmeth\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'cg'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 595\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_minimize_cg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfun\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjac\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 596\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mmeth\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'bfgs'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 597\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_minimize_bfgs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfun\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjac\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Projects/homemade-machine-learning/env/lib/python3.7/site-packages/scipy/optimize/optimize.py\u001b[0m in \u001b[0;36m_minimize_cg\u001b[0;34m(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)\u001b[0m\n\u001b[1;32m 1268\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1269\u001b[0m \u001b[0;31m# Sets the initial step guess to dx ~ 1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1270\u001b[0;31m \u001b[0mold_fval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mxk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1271\u001b[0m \u001b[0mold_old_fval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mold_fval\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinalg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgfk\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1272\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Projects/homemade-machine-learning/env/lib/python3.7/site-packages/scipy/optimize/optimize.py\u001b[0m in \u001b[0;36mfunction_wrapper\u001b[0;34m(*wrapper_args)\u001b[0m\n\u001b[1;32m 291\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfunction_wrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mwrapper_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0mncalls\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 293\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwrapper_args\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 294\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mncalls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunction_wrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Projects/homemade-machine-learning/homemade/neural_network/multilayer_perceptron.py\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(current_theta)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0;31m# Function that we're going to minimize.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 70\u001b[0m lambda current_theta: MultilayerPerceptron.cost_function(\n\u001b[0;32m---> 71\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcurrent_theta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlayers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mregularization_param\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 72\u001b[0m ),\n\u001b[1;32m 73\u001b[0m \u001b[0;31m# Initial values of model parameter.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Projects/homemade-machine-learning/homemade/neural_network/multilayer_perceptron.py\u001b[0m in \u001b[0;36mcost_function\u001b[0;34m(data, labels, thetas, layers, regularization_param)\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 142\u001b[0m \u001b[0;31m# Feedforward the neural network.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 143\u001b[0;31m \u001b[0mpredictions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMultilayerPerceptron\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeedforward_propagation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mthetas\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlayers\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 145\u001b[0m \u001b[0;31m# Compute the cost.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Projects/homemade-machine-learning/homemade/neural_network/multilayer_perceptron.py\u001b[0m in \u001b[0;36mfeedforward_propagation\u001b[0;34m(data, thetas, layers)\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mlayer_index\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnum_layers\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 186\u001b[0m \u001b[0mtheta\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mthetas\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mlayer_index\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 187\u001b[0;31m \u001b[0mout_layer_activation\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msigmoid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0min_layer_activation\u001b[0m \u001b[0;34m@\u001b[0m \u001b[0mtheta\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 188\u001b[0m \u001b[0;31m# Add bias units.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 189\u001b[0m \u001b[0mout_layer_activation\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mones\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnum_examples\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout_layer_activation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Scalar operands are not allowed, use '*' instead"
]
}
],
Expand Down

0 comments on commit 97bbb29

Please sign in to comment.