forked from mechmotum/cyipopt
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlasso.py
187 lines (138 loc) · 4.93 KB
/
lasso.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# -*- coding: utf-8 -*-
"""
cyipopt: Python wrapper for the Ipopt optimization package, written in Cython.
Copyright (C) 2012-2015 Amit Aides
Copyright (C) 2015-2017 Matthias Kümmerer
Copyright (C) 2017-2023 cyipopt developers
License: EPL 2.0
Usage::
$ python lasso.py
$ python lasso.py -p
$ python lasso.py --plot
"""
#
# This function executes IPOPT to find the maximum likelihood solution to
# least squares regression with L1 regularization or the "Lasso". The inputs
# are the data matrix A (in which each row is an example vector), the vector
# of regression outputs y, and the penalty parameter lambda, a number
# greater than zero. The output is the estimated vector of regression
# coefficients.
#
# Adapted from matlab code by Peter Carbonetto.
#
import numpy as np
import cyipopt
class lasso(cyipopt.Problem):
def __init__(self, A, y):
self._A = A
self._y = y
self._m = A.shape[1]
#
# The constraint functions are bounded from below by zero.
#
cl = np.zeros(2*self._m)
super(lasso, self).__init__(
2*self._m,
2*self._m,
cl=cl
)
#
# Set solver options
#
self.add_option('derivative_test', 'second-order')
self.add_option('jac_d_constant', 'yes')
self.add_option('hessian_constant', 'yes')
self.add_option('mu_strategy', 'adaptive')
self.add_option('max_iter', 100)
self.add_option('tol', 1e-8)
def solve(self, _lambda):
x0 = np.concatenate((np.zeros(m), np.ones(m)))
self._lambda = _lambda
x, info = super(lasso, self).solve(x0)
return x[:self._m]
def objective(self, x):
w = x[:self._m].reshape((-1, 1))
u = x[self._m:].reshape((-1, 1))
return np.linalg.norm(self._y - np.dot(self._A, w))**2/2 + self._lambda * np.sum(u)
def constraints(self, x):
w = x[:self._m].reshape((-1, 1))
u = x[self._m:].reshape((-1, 1))
return np.vstack((u + w, u - w))
def gradient(self, x):
w = x[:self._m].reshape((-1, 1))
g = np.vstack((np.dot(-self._A.T, self._y - np.dot(self._A, w)), self._lambda*np.ones((self._m, 1))))
return g
def jacobianstructure(self):
#
# Create a sparse matrix to hold the jacobian structure
#
return np.nonzero(np.tile(np.eye(self._m), (2, 2)))
def jacobian(self, x):
I = np.eye(self._m)
J = np.vstack((np.hstack((I, I)), np.hstack((-I, I))))
row, col = self.jacobianstructure()
return J[row, col]
def hessianstructure(self):
h = np.zeros((2*self._m, 2*self._m))
h[:self._m, :self._m] = np.tril(np.ones((self._m, self._m)))
#
# Create a sparse matrix to hold the hessian structure
#
return np.nonzero(h)
def hessian(self, x, lagrange, obj_factor):
H = np.zeros((2*self._m, 2*self._m))
H[:self._m, :self._m] = np.tril(np.tril(np.dot(self._A.T, self._A)))
row, col = self.hessianstructure()
return obj_factor*H[row, col]
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('-p','--plot', help='Plot results with matplotlib',
action='store_true')
args = parser.parse_args()
#
# _lambda - Level of L1 regularization
# n - Number of training examples
# e - Std. dev. in noise of outputs
# beta - "True" regression coefficients
#
n = 100
e = 1
beta = np.array((0, 0, 2, -4, 0, 0, -1, 3), dtype=np.float64).reshape((-1, 1))
#
# Set the random number generator seed.
#
seed = 7
np.random.seed(seed)
#
# CREATE DATA SET.
# Generate the input vectors from the standard normal, and generate the
# binary responses from the regression with some additional noise, and then
# transform the results using the logistic function. The variable "beta" is
# the set of true regression coefficients.
#
# A - The n x m matrix of examples
# noise - Noise in outputs
# y - The binary outputs
#
m = len(beta)
A = np.random.randn(n, m)
noise = e * np.random.randn(n, 1)
y = np.dot(A, beta) + noise
#
# COMPUTE SOLUTION WITH IPOPT.
# Compute the L1-regularized maximum likelihood estimator.
#
problem = lasso(A, y)
LAMBDA_SAMPLES = 30
_lambdas = np.logspace(0, 2.6, LAMBDA_SAMPLES)
estim_betas = np.zeros((LAMBDA_SAMPLES, beta.size))
for i, _lambda in enumerate(_lambdas):
estim_betas[i, :] = problem.solve(_lambda)
if args.plot:
import matplotlib.pyplot as plt
plt.plot(_lambdas, estim_betas)
plt.xlabel('L1 Regularization Parameter')
plt.ylabel('Estimated Regression Coefficients')
plt.title('Lasso Solution as Function of The Regularization Parameter')
plt.show()