Implemented stepwise EM

charlienash · Jun 14, 2016 · 52fc9af · 52fc9af
1 parent 93bde9e
commit 52fc9af
Show file tree

Hide file tree

Showing 5 changed files with 73 additions and 64 deletions.
diff --git a/__pycache__/pyMM.cpython-34.pyc b/__pycache__/pyMM.cpython-34.pyc
diff --git a/__pycache__/util.cpython-34.pyc b/__pycache__/util.cpython-34.pyc
diff --git a/demo.py b/demo.py
@@ -1,30 +1,33 @@
 import numpy as np
 from pyMM import GMM, SphericalGMM, DiagonalGMM, MPPCA, MFA
-from util import _generate_mixture_data, plot_density
+from util import _generate_mixture_data, plot_density, _gen_low_rank_data
 
 
 def main():
-    n_examples = 2500
-    data_dim = 500
+    n_examples = 2000
+    data_dim = 1000
     n_components = 1
-    X = _generate_mixture_data(data_dim, n_components, n_examples)
+    rank = 50
+#    X = _generate_mixture_data(data_dim, n_components, n_examples)
+    X = _gen_low_rank_data(data_dim, rank, n_examples)
 
     # Obscure data
     r = np.random.rand(n_examples, data_dim)
     X_miss = X.copy()
-    X_miss[r > 0.7] = np.nan
+    X_miss[r > 0.6] = np.nan
 
     # Initialize model
-    gmm = GMM(n_components=n_components, robust=True, SMALL=1e-5)
+#    gmm = GMM(n_components=n_components)
 #    gmm = SphericalGMM(n_components=n_components, robust=True)
 #    gmm = DiagonalGMM(n_components=n_components)
 #    gmm = MPPCA(n_components=n_components, latent_dim=1)
-#    gmm = MFA(n_components=n_components, latent_dim=50)
+    gmm = MFA(n_components=n_components, latent_dim=40, tol=1e-3)
 
     # Fit GMM
-#    gmm.stepwise_fit(X, init_method='kmeans')
-    gmm.fit(X, init_method='kmeans')
-#    gmm.fit(X_miss, init_method='kmeans')
+    gmm.stepwise_fit(X_miss, init_method='kmeans', batch_size=200,
+                     step_alpha=0.7)
+#    gmm.fit(X, init_method='kmeans')
+    gmm.fit(X_miss, init_method='kmeans')
 
     # Plot results
 #    plot_density(gmm, X=X, n_grid=50)

diff --git a/pyMM.py b/pyMM.py
@@ -115,7 +115,7 @@ def _e_step_no_miss(self, X, params):
         # Get params
         mu_list = params['mu_list']
         components = params['components']
-        n_examples, data_dim = X.shape
+        n_examples = X.shape[0]
 
         # Get Sigma from params
         Sigma_list = self._params_to_Sigma(params)
@@ -124,7 +124,6 @@ def _e_step_no_miss(self, X, params):
         log_r = np.zeros([n_examples, self.n_components])
         for k, mu, Sigma in zip(range(self.n_components), mu_list,
                                 Sigma_list):
-#            print(np.linalg.eig(Sigma))
             try:
                 log_r[:, k] = multivariate_normal.logpdf(X, mu, Sigma)
             except (np.linalg.linalg.LinAlgError, ValueError):
@@ -373,22 +372,22 @@ def fit(self, X, params_init=None, init_method='kmeans'):
             Matrix of training data, where nExamples is the number of
             examples and nFeatures is the number of features.
         """
-        n_examples, data_dim = np.shape(X)
-        self.data_dim = data_dim
-        self.n_examples = n_examples
         if np.isnan(X).any():
             self.missing_data = True
         else:
             self.missing_data = False
 
+        # Check for missing values and remove if whole row is missing
+        X = X[~np.isnan(X).all(axis=1), :]
+        n_examples, data_dim = np.shape(X)
+        self.data_dim = data_dim
+        self.n_examples = n_examples
+
         if params_init is None:
             params = self._init_params(X, init_method)
         else:
             params = params_init
 
-        # Check for missing values and remove if whole row is missing
-        X = X[~np.isnan(X).all(axis=1), :]
-
         oldL = -np.inf
         for i in range(self.max_iter):
 
@@ -398,7 +397,7 @@ def fit(self, X, params_init=None, init_method='kmeans'):
             # Evaluate likelihood
             ll = sample_ll.mean() / self.data_dim
             if self.verbose:
-                print("Iter {:d}   NLL: {:.3f}   Change: {:.3f}".format(i,
+                print("Iter {:d}   NLL: {:.4f}   Change: {:.4f}".format(i,
                       -ll, -(ll-oldL)), flush=True)
 
             # Break if change in likelihood is small
@@ -421,7 +420,7 @@ def fit(self, X, params_init=None, init_method='kmeans'):
         self.isFitted = True
 
     def stepwise_fit(self, X, params_init=None, init_method='kmeans',
-                     batch_size=500, step_alpha=0.75):
+                     batch_size=250, step_alpha=0.7):
         """ Fit the model using EM with data X.
 
         Args
@@ -430,22 +429,22 @@ def stepwise_fit(self, X, params_init=None, init_method='kmeans',
             Matrix of training data, where nExamples is the number of
             examples and nFeatures is the number of features.
         """
-        n_examples, data_dim = np.shape(X)
-        self.data_dim = data_dim
-        self.n_examples = n_examples
         if np.isnan(X).any():
             self.missing_data = True
         else:
             self.missing_data = False
 
+        # Check for missing values and remove if whole row is missing
+        X = X[~np.isnan(X).all(axis=1), :]
+        n_examples, data_dim = np.shape(X)
+        self.data_dim = data_dim
+        self.n_examples = n_examples
+
         if params_init is None:
             params = self._init_params(X, init_method)
         else:
             params = params_init
 
-        # Check for missing values and remove if whole row is missing
-        X = X[~np.isnan(X).all(axis=1), :]
-
         # Get batch indices
         batch_id = np.hstack([np.arange(0, n_examples, batch_size),
                               n_examples])
@@ -465,7 +464,7 @@ def stepwise_fit(self, X, params_init=None, init_method='kmeans',
             # Evaluate likelihood
             ll = sample_ll.mean() / self.data_dim
             if self.verbose:
-                print("Iter {:d}   NLL: {:.3f}   Change: {:.3f}".format(i,
+                print("Iter {:d}   NLL: {:.4f}   Change: {:.4f}".format(i,
                       -ll, -(ll-oldL)), flush=True)
 
             # Break if change in likelihood is small
@@ -812,8 +811,7 @@ def _e_step_no_miss(self, X, params):
             ss_list.append(np.sum(r*(s1 + s2 + s3)))
 
         # Store sufficient statistics in dictionary
-        ss = {'n_examples': n_examples,
-              'r_list': r_list,
+        ss = {'r_list': r_list,
               'x_list': x_list,
               'xz_list': xz_list,
               'z_list': z_list,
@@ -985,8 +983,7 @@ def _e_step_miss(self, X, params):
             ss_list.append(ss_tot)
 
         # Store sufficient statistics in dictionary
-        ss = {'n_examples': n_examples,
-              'r_list': r_list,
+        ss = {'r_list': r_list,
               'x_list': x_list,
               'xz_list': xz_list,
               'z_list': z_list,
@@ -1014,7 +1011,7 @@ def _m_step(self, ss, params):
         params : dict
 
         """
-        n_examples = ss['n_examples']
+        n_examples = self.n_examples
         r_list = ss['r_list']
         x_list = ss['x_list']
         z_list = ss['z_list']
@@ -1204,8 +1201,7 @@ def _e_step_no_miss(self, X, params):
             zx_list.append(np.sum(zx*r[:, np.newaxis, np.newaxis], axis=0))
 
         # Store sufficient statistics in dictionary
-        ss = {'n_examples': n_examples,
-              'r_list': r_list,
+        ss = {'r_list': r_list,
               'x_list': x_list,
               'xx_list': xx_list,
               'xz_list': xz_list,
@@ -1383,8 +1379,7 @@ def _e_step_miss(self, X, params):
             zx_list.append(zx_tot)
 
         # Store sufficient statistics in dictionary
-        ss = {'n_examples': n_examples,
-              'r_list': r_list,
+        ss = {'r_list': r_list,
               'x_list': x_list,
               'xx_list': xx_list,
               'xz_list': xz_list,
@@ -1413,7 +1408,7 @@ def _m_step(self, ss, params):
         params : dict
 
         """
-        n_examples = ss['n_examples']
+        n_examples = self.n_examples
         r_list = ss['r_list']
         x_list = ss['x_list']
         xx_list = ss['xx_list']

diff --git a/util.py b/util.py
@@ -3,29 +3,29 @@
 from matplotlib.patches import Ellipse
 
 def _mv_gaussian_pdf(X, mu, Sigma):
-    """ 
+    """
     Get Gaussian probability density for given data points and parameters.
-    """    
+    """
     return np.exp(_mv_gaussian_log_pdf(X, mu, Sigma))
-        
+
 def _mv_gaussian_log_pdf(X, mu, Sigma):
-    """ 
-    Get Gaussian log probability density for given data points and 
+    """
+    Get Gaussian log probability density for given data points and
         parameters.
-    """    
+    """
     d = mu.size
     dev = X - mu
     Sigma_inv = np.linalg.inv(Sigma)
     log_det = np.log(np.linalg.det(Sigma))
     maha = np.diag(dev.dot(Sigma_inv).dot(dev.T))
     return -0.5 * (d*np.log(2*np.pi) + log_det + maha)
-    
+
 def _get_rand_cov_mat(dim):
     Sigma = np.random.randn(dim, dim)
     Sigma = Sigma.dot(Sigma.T)
     Sigma = Sigma + dim*np.eye(dim)
     return Sigma
-    
+
 def _generate_mixture_data(dim, n_components, n_samples):
     components = np.random.rand(n_components)
     components = components / np.sum(components)
@@ -35,14 +35,25 @@ def _generate_mixture_data(dim, n_components, n_samples):
     samples = np.zeros([n_samples, dim])
     for n in range(n_samples):
         r = np.random.rand(1)
-        z = np.argmin(r > components_cumsum)               
-        samples[n] = np.random.multivariate_normal(mu_list[z], Sigma_list[z])  
+        z = np.argmin(r > components_cumsum)
+        samples[n] = np.random.multivariate_normal(mu_list[z], Sigma_list[z])
     return samples
-
+
+def _gen_low_rank_data(dim, rank, n_samples):
+    sigma = 1.
+    rng = np.random.RandomState(42)
+    U, _, _ = np.linalg.svd(rng.randn(dim, dim))
+    X = np.dot(rng.randn(n_samples, rank), U[:, :rank].T)
+
+    # Adding heteroscedastic noise
+    sigmas = sigma * rng.rand(dim) + sigma / 2.
+    X_hetero = X + rng.randn(n_samples, dim) * sigmas
+    return X_hetero
+
 def plot_cov_ellipse(cov, pos, nstd=2, ax=None, **kwargs):
     """
     Plots an `nstd` sigma error ellipse based on the specified covariance
-    matrix (`cov`). Additional keyword arguments are passed on to the 
+    matrix (`cov`). Additional keyword arguments are passed on to the
     ellipse patch artist.
 
     Parameters
@@ -52,7 +63,7 @@ def plot_cov_ellipse(cov, pos, nstd=2, ax=None, **kwargs):
             sequence of [x0, y0].
         nstd : The radius of the ellipse in numbers of standard deviations.
             Defaults to 2 standard deviations.
-        ax : The axis that the ellipse will be plotted on. Defaults to the 
+        ax : The axis that the ellipse will be plotted on. Defaults to the
             current axis.
         Additional keyword arguments are pass on to the ellipse patch.
 
@@ -77,37 +88,37 @@ def eigsorted(cov):
 
     ax.add_artist(ellip)
     return ellip
-    
-def plot_density(model, x_range='auto', y_range='auto', n_grid=100, 
-                   with_scatter=True, X=None, contour_options=None, 
+
+def plot_density(model, x_range='auto', y_range='auto', n_grid=100,
+                   with_scatter=True, X=None, contour_options=None,
                    scatter_options=None, with_missing=False, X_miss=None):
-                       
+
     # Set default options
     if contour_options is None:
         contour_options = {'cmap' : plt.cm.plasma}
-    if scatter_options is None: 
+    if scatter_options is None:
         scatter_options = {'color' : 'w', 'alpha' : 0.5, 'lw' : 0}
         scatter_options_miss = {'color' : 'r', 'alpha' : 0.5, 'lw' : 0}
-        
+
     # Automatic x_range and y_range
     if x_range == 'auto' and X is not None:
         x_range = [X[:,0].min() - 2, X[:,0].max() + 2]
     if y_range == 'auto' and X is not None:
         y_range = [X[:,1].min() - 2, X[:,1].max() + 2]
-    
+
     # Setup grid for contour plot
     x_vec = np.linspace(x_range[0], x_range[1], n_grid)
-    y_vec = np.linspace(y_range[0], y_range[1], n_grid) 
-    x, y, = np.meshgrid(x_vec, y_vec)     
-    X_grid = np.zeros([n_grid**2, 2])                        
+    y_vec = np.linspace(y_range[0], y_range[1], n_grid)
+    x, y, = np.meshgrid(x_vec, y_vec)
+    X_grid = np.zeros([n_grid**2, 2])
     X_grid[:,0] = x.reshape(n_grid**2)
     X_grid[:,1] = y.reshape(n_grid**2)
-    
+
     # Get sample log-likelihood from model
     grid_ll = model.score_samples(X_grid)
     grid_prob = np.exp(grid_ll) # Convert to probability density
     grid_prob = grid_prob.reshape((n_grid, n_grid))
-        
+
     # Plot contours
     plt.contourf(x, y, grid_prob, **contour_options)
 
@@ -118,13 +129,13 @@ def plot_density(model, x_range='auto', y_range='auto', n_grid=100,
             plt.scatter(X[~id_miss,0], X[~id_miss,1], **scatter_options)
             plt.scatter(X[id_miss,0], X[id_miss,1], **scatter_options_miss)
         else:
-            plt.scatter(X[:,0], X[:,1], **scatter_options) # data                  
+            plt.scatter(X[:,0], X[:,1], **scatter_options) # data
 
     # Plot options
     plt.xlim(x_range)
     plt.ylim(y_range)
     plt.xticks([])
     plt.yticks([])
-    plt.show()    
+    plt.show()
 #    fig = plt.gcf()
 #    fig.set_size_inches(8, 8, forward=True)