diff --git a/homemade/neural_network/multilayer_perceptron.py b/homemade/neural_network/multilayer_perceptron.py index ddd3a61..407beeb 100644 --- a/homemade/neural_network/multilayer_perceptron.py +++ b/homemade/neural_network/multilayer_perceptron.py @@ -48,14 +48,14 @@ def train(self, regularization_param=0, max_iterations=1000): return self.thetas, cost_histories @staticmethod - def gradient_descent(data, labels, initial_theta, layers, regularization_param, max_iteration): + def gradient_descent(data, labels, unrolled_theta, layers, regularization_param, max_iteration): """Gradient descent function. Iteratively optimizes theta model parameters. :param data: the set of training or test data. :param labels: training set outputs (0 or 1 that defines the class of an example). - :param initial_theta: initial model parameters. + :param unrolled_theta: initial model parameters. :param layers: model layers configuration. :param regularization_param: regularization parameter. :param max_iteration: maximum number of gradient descent steps. @@ -67,21 +67,31 @@ def gradient_descent(data, labels, initial_theta, layers, regularization_param, # Launch gradient descent. minification_result = minimize( # Function that we're going to minimize. - lambda current_theta: MultilayerPerceptron.cost_function( - data, labels, current_theta, layers, regularization_param + lambda current_unrolled_theta: MultilayerPerceptron.cost_function( + data, + labels, + MultilayerPerceptron.thetas_roll(current_unrolled_theta, layers), + layers, + regularization_param ), # Initial values of model parameter. - initial_theta, + unrolled_theta, # We will use conjugate gradient algorithm. method='CG', # Function that will help to calculate gradient direction on each step. - jac=lambda current_theta: MultilayerPerceptron.gradient_step( - data, labels, current_theta, layers, regularization_param + jac=lambda current_unrolled_theta: MultilayerPerceptron.gradient_step( + data, labels, current_unrolled_theta, layers, regularization_param ), # Record gradient descent progress for debugging. - callback=lambda current_theta: cost_history.append(MultilayerPerceptron.cost_function( - data, labels, current_theta, layers, regularization_param - )), + callback=lambda current_unrolled_theta: cost_history.append( + MultilayerPerceptron.cost_function( + data, + labels, + MultilayerPerceptron.thetas_roll(current_unrolled_theta, layers), + layers, + regularization_param + ) + ), options={'maxiter': max_iteration} ) @@ -101,7 +111,7 @@ def gradient_step(data, labels, unrolled_thetas, layers, regularization_param): :param data: training set. :param labels: training set labels. - :param unrolled_thetas: flat vector of model parameters. + :param unrolled_thetas: model parameters. :param layers: model layers configuration. :param regularization_param: parameters that fights with model over-fitting. """ diff --git a/notebooks/neural_network/multilayer_perceptron_demo.ipynb b/notebooks/neural_network/multilayer_perceptron_demo.ipynb index 554c87f..3b82c97 100644 --- a/notebooks/neural_network/multilayer_perceptron_demo.ipynb +++ b/notebooks/neural_network/multilayer_perceptron_demo.ipynb @@ -489,15 +489,34 @@ }, { "cell_type": "code", - "execution_count": 125, + "execution_count": 127, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[1.54168371e-06 0.00000000e+00 0.00000000e+00 ... 2.03102558e-01\n", - " 1.36934643e-01 1.77471528e-01]\n" + "[-0.05339685 0.06088466 -0.07526288 ... 0.08618594 -0.04829076\n", + " 0.0289159 ]\n" + ] + }, + { + "ename": "ValueError", + "evalue": "Scalar operands are not allowed, use '*' instead", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;31m# Train neural network.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0;34m(\u001b[0m\u001b[0mthetas\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcosts\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmultilayer_perceptron\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mregularization_param\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_iterations\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Projects/homemade-machine-learning/homemade/neural_network/multilayer_perceptron.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, regularization_param, max_iterations)\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlayers\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[0mregularization_param\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 45\u001b[0;31m \u001b[0mmax_iterations\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 46\u001b[0m )\n\u001b[1;32m 47\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Projects/homemade-machine-learning/homemade/neural_network/multilayer_perceptron.py\u001b[0m in \u001b[0;36mgradient_descent\u001b[0;34m(data, labels, initial_theta, layers, regularization_param, max_iteration)\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcurrent_theta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlayers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mregularization_param\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 84\u001b[0m )),\n\u001b[0;32m---> 85\u001b[0;31m \u001b[0moptions\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'maxiter'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mmax_iteration\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 86\u001b[0m )\n\u001b[1;32m 87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Projects/homemade-machine-learning/env/lib/python3.7/site-packages/scipy/optimize/_minimize.py\u001b[0m in \u001b[0;36mminimize\u001b[0;34m(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)\u001b[0m\n\u001b[1;32m 593\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_minimize_powell\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfun\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 594\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mmeth\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'cg'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 595\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_minimize_cg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfun\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjac\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 596\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mmeth\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'bfgs'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 597\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_minimize_bfgs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfun\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjac\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Projects/homemade-machine-learning/env/lib/python3.7/site-packages/scipy/optimize/optimize.py\u001b[0m in \u001b[0;36m_minimize_cg\u001b[0;34m(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)\u001b[0m\n\u001b[1;32m 1268\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1269\u001b[0m \u001b[0;31m# Sets the initial step guess to dx ~ 1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1270\u001b[0;31m \u001b[0mold_fval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mxk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1271\u001b[0m \u001b[0mold_old_fval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mold_fval\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinalg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgfk\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1272\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Projects/homemade-machine-learning/env/lib/python3.7/site-packages/scipy/optimize/optimize.py\u001b[0m in \u001b[0;36mfunction_wrapper\u001b[0;34m(*wrapper_args)\u001b[0m\n\u001b[1;32m 291\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfunction_wrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mwrapper_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0mncalls\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 293\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwrapper_args\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 294\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mncalls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunction_wrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Projects/homemade-machine-learning/homemade/neural_network/multilayer_perceptron.py\u001b[0m in \u001b[0;36m\u001b[0;34m(current_theta)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0;31m# Function that we're going to minimize.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 70\u001b[0m lambda current_theta: MultilayerPerceptron.cost_function(\n\u001b[0;32m---> 71\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcurrent_theta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlayers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mregularization_param\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 72\u001b[0m ),\n\u001b[1;32m 73\u001b[0m \u001b[0;31m# Initial values of model parameter.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Projects/homemade-machine-learning/homemade/neural_network/multilayer_perceptron.py\u001b[0m in \u001b[0;36mcost_function\u001b[0;34m(data, labels, thetas, layers, regularization_param)\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 142\u001b[0m \u001b[0;31m# Feedforward the neural network.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 143\u001b[0;31m \u001b[0mpredictions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMultilayerPerceptron\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeedforward_propagation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mthetas\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlayers\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 145\u001b[0m \u001b[0;31m# Compute the cost.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Projects/homemade-machine-learning/homemade/neural_network/multilayer_perceptron.py\u001b[0m in \u001b[0;36mfeedforward_propagation\u001b[0;34m(data, thetas, layers)\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mlayer_index\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnum_layers\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 186\u001b[0m \u001b[0mtheta\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mthetas\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mlayer_index\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 187\u001b[0;31m \u001b[0mout_layer_activation\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msigmoid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0min_layer_activation\u001b[0m \u001b[0;34m@\u001b[0m \u001b[0mtheta\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 188\u001b[0m \u001b[0;31m# Add bias units.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 189\u001b[0m \u001b[0mout_layer_activation\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mones\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnum_examples\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout_layer_activation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Scalar operands are not allowed, use '*' instead" ] } ],