py-why · kbattocchi · Apr 8, 2019 · Mar 22, 2019 · Mar 6, 2019
diff --git a/econml/cate_estimator.py b/econml/cate_estimator.py
@@ -145,9 +145,13 @@ def effect(self, T0, T1, X=None):
         # TODO: what if input is sparse? - there's no equivalent to einsum,
         #       but tensordot can't be applied to this problem because we don't sum over m
         dT = T1 - T0
+        eff = self.const_marginal_effect(X)
+        einsum_str = 'myt,mt->my'
         if ndim(dT) == 1:
-            dT = reshape(dT, (-1, 1))
-        return np.einsum('myt,mt->my', self.const_marginal_effect(X), dT)
+            einsum_str = einsum_str.replace('t', '')
+        if ndim(eff) == ndim(dT):  # y is a vector, rather than a 2D array
+            einsum_str = einsum_str.replace('y', '')
+        return np.einsum(einsum_str, eff, dT)
 
     def marginal_effect(self, T, X=None):
         """

diff --git a/econml/dml.py b/econml/dml.py
@@ -69,8 +69,9 @@ def fit(self, Y, T, X=None, W=None):
         T : array_like, shape (n, d_t)
             Treatment policy.
 
-        X : array-like, shape (n, d_x)
+        X : array-like, shape (n, d_x) or None (default=None)
             Feature vector that captures heterogeneity.
+            If X is None, then the featurizer will be ignored and X will be treated as a column of ones.
 
         W : array-like, shape (n, d_w) or None (default=None)
             High-dimensional controls.
@@ -81,14 +82,17 @@ def fit(self, Y, T, X=None, W=None):
 
         """
         if X is None:
-            X = np.empty((shape(Y)[0], 0))
+            X = np.ones((shape(Y)[0], 1))
+            phi_X = X
+        else:
+            phi_X = self._featurizer.fit_transform(X)
         if W is None:
             W = np.empty((shape(Y)[0], 0))
         assert shape(Y)[0] == shape(T)[0] == shape(X)[0] == shape(W)[0]
 
         # Handle case where Y or T is a vector instead of a 2-dimensional array
-        self._d_t = shape(T)[1] if ndim(T) == 2 else 1
-        self._d_y = shape(Y)[1] if ndim(Y) == 2 else 1
+        self._d_t = shape(T)[1:]
+        self._d_y = shape(Y)[1:]
 
         y_res = np.zeros(shape(Y))
         t_res = np.zeros(shape(T))
@@ -108,21 +112,9 @@ def fit(self, Y, T, X=None, W=None):
             self._models_y[idx].fit(hstack([X_train, W_train]), Y_train)
             y_res[test_idxs] = Y_test - self._models_y[idx].predict(hstack([X_test, W_test]))
 
-        phi_X = self._featurizer.fit_transform(X)
         self._d_phi = shape(phi_X)[1]
 
-        if phi_X.ndim == 2:
-            self._simple_features = True
-            self._model_final.fit(cross_product(phi_X, t_res), y_res)
-        else:
-            assert phi_X.ndim == 4  # m × d_phi × d_y × dₜ
-            assert shape(phi_X)[2] == shape(Y)[1] if ndim(Y) == 2 else 1
-            assert shape(phi_X)[3] == shape(T)[1] if ndim(T) == 2 else 1
-            self._simple_features = False
-            if ndim(T) == 1:
-                t_res = reshape(t_res, (-1, 1))  # reshape so that einsum indices are correct even in vector t case
-            self._model_final.fit(reshape(np.einsum('mt,mpyt->myp', t_res, phi_X),
-                                          (-1, self._d_phi)), reshape(y_res, (-1,)))
+        self._model_final.fit(cross_product(phi_X, t_res), y_res)
 
     def const_marginal_effect(self, X=None):
         """
@@ -134,7 +126,8 @@ def const_marginal_effect(self, X=None):
         Parameters
         ----------
         X: optional (m × dₓ) matrix
-            Features for each sample
+            Features for each sample.
+            If X is None, it will be treated as a column of ones with a single row
 
         Returns
         -------
@@ -145,22 +138,22 @@ def const_marginal_effect(self, X=None):
             (e.g. if both are vectors, then the output of this method will also be a vector)
         """
         if X is None:
-            X = np.empty((1, 0))
-        if self._simple_features:
-            # TODO: Doing this kronecker/reshaping/transposing stuff so that predict can be called
-            #       rather than just using coef_ seems silly, but one benefit is that we can use linear models
-            #       that don't expose a coef_ (e.g. a GridSearchCV over underlying linear models)
-            flat_eye = reshape(np.eye(self._d_t), (1, -1))
-            XT = reshape(np.kron(flat_eye, self._featurizer.fit_transform(X)),
-                         (self._d_t * shape(X)[0], -1))
-            effects = reshape(self._model_final.predict(XT), (-1, self._d_t, self._d_y))
+            phi_X = np.ones((1, 1))
+        else:
+            phi_X = self._featurizer.fit_transform(X)
+        # create an identity matrix of size d_t (or just a 1-element array if T was a vector)
+        eye = np.eye(self._d_t[0]) if self._d_t else np.array([1])
+        # TODO: Doing this kronecker/reshaping/transposing stuff so that predict can be called
+        #       rather than just using coef_ seems silly, but one benefit is that we can use linear models
+        #       that don't expose a coef_ (e.g. a GridSearchCV over underlying linear models)
+        flat_eye = reshape(eye, (1, -1))
+        XT = reshape(np.kron(flat_eye, phi_X),
+                     ((self._d_t[0] if self._d_t else 1) * shape(phi_X)[0], -1))
+        effects = reshape(self._model_final.predict(XT), (-1,) + self._d_t + self._d_y)
+        if self._d_t and self._d_y:
             return transpose(effects, (0, 2, 1))  # need to return as m by d_y by d_t matrix
         else:
-            return reshape(self._model_final.predict(reshape(np.einsum('ts,mpyt->mysp',
-                                                                       np.eye(self._d_t),
-                                                                       self._featurizer.fit_transform(X)),
-                                                             (-1, self._d_phi))),
-                           (-1, self._d_y, self._d_t))
+            return effects
 
     @property
     def coef_(self):
@@ -172,10 +165,7 @@ def coef_(self):
         (e.g. a `Pipeline` or `GridSearchCV` which wraps a linear model)
         """
         # TODO: handle case where final model doesn't directly expose coef_?
-        if self._simple_features:
-            return reshape(self._model_final.coef_, (self._d_y, self._d_t, self._d_phi))
-        else:
-            return self._model_final.coef_
+        return reshape(self._model_final.coef_, self._d_y + self._d_t + (self._d_phi,))
 
 
 class SparseLinearDMLCateEstimator(DMLCateEstimator):
@@ -218,9 +208,6 @@ def transform(XW):
             X = XW[:, :self._d_x]
             W = XW[:, self._d_x:]
             F = featurizer.fit_transform(X)
-            if ndim(F) == 4:
-                # flatten the matrix features so that we can properly perform the cross product
-                F = reshape(F, (shape(XW)[0], -1))
             return cross_product(XW, hstack([np.ones((shape(XW)[0], 1)), F, W]))
 
         model_y = Pipeline([("transform", FunctionTransformer(transform)), ("model", linear_model_y)])

diff --git a/econml/tests/test_dml.py b/econml/tests/test_dml.py
@@ -4,34 +4,14 @@
 import unittest
 import pytest
 from sklearn.base import TransformerMixin
-from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import LinearRegression, Lasso
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
 from econml.dml import DMLCateEstimator, SparseLinearDMLCateEstimator
 import numpy as np
 from econml.utilities import shape, hstack, vstack, reshape, cross_product
 
 
-class MatrixFeatures(TransformerMixin):
-    """Featurizer that produces each one-entry matrix of size d_y by d_t for each feature in X (plus a constant)."""
-
-    def __init__(self, d_y, d_t):
-        self._d_y = d_y
-        self._d_t = d_t
-        self._fts = np.eye(d_y * d_t).reshape(d_y * d_t, d_y, d_t)
-
-    def fit(self, X):
-        return self
-
-    def transform(self, X):
-        # add column of ones to X
-        X = hstack([np.ones((shape(X)[0], 1)), X])
-        d_x = shape(X)[1]
-        d_y, d_t = self._d_y, self._d_t
-        # for each row, create the d_y*d_t*(d_x+1) features (which are matrices of size d_y by d_t)
-        return reshape(np.einsum('nx,fyt->nfxyt', X, self._fts), (shape(X)[0], d_y * d_t * d_x, d_y, d_t))
-
-
 # all solutions to underdetermined (or exactly determined) Ax=b are given by A⁺b+(I-A⁺A)w for some arbitrary w
 # note that if Ax=b is overdetermined, this will raise an assertion error
 def rand_sol(A, b):
@@ -46,26 +26,22 @@ class TestDML(unittest.TestCase):
 
     def test_cate_api(self):
         """Test that we correctly implement the CATE API."""
-        d_w = 2
-        d_x = 2
-        d_y = 2
-        d_t = 2
-        n = 20
-        with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t):
-            W, X, Y, T = [np.random.normal(size=(n, d)) for d in [d_w, d_x, d_y, d_t]]
-
-            dml = DMLCateEstimator(model_y=LinearRegression(), model_t=LinearRegression())
-            dml.fit(Y, T, X, W)
-            # just make sure we can call the marginal_effect and effect methods
-            dml.marginal_effect(None, X)
-            dml.effect(0, T, X)
-
-            dml = DMLCateEstimator(model_y=LinearRegression(), model_t=LinearRegression(),
-                                   featurizer=MatrixFeatures(d_y, d_t))
-            dml.fit(Y, T, X, W)
-            # just make sure we can call the marginal_effect and effect methods
-            dml.marginal_effect(None, X)
-            dml.effect(0, T, X)
+        for d_t in [2, -1]:
+            for d_y in [2, -1]:
+                for d_x in [2, None]:
+                    for d_w in [2, None]:
+                        n = 20
+                        with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t):
+                            W, X, Y, T = [np.random.normal(size=(n, d)) if (d and d >= 0)
+                                          else np.random.normal(size=(n,)) if (d and d < 0)
+                                          else None
+                                          for d in [d_w, d_x, d_y, d_t]]
+
+                            dml = DMLCateEstimator(model_y=LinearRegression(), model_t=LinearRegression())
+                            dml.fit(Y, T, X, W)
+                            # just make sure we can call the marginal_effect and effect methods
+                            dml.marginal_effect(None, X)
+                            dml.effect(0, T, X)
 
     def test_can_use_vectors(self):
         """Test that we can pass vectors for T and Y (not only 2-dimensional arrays)."""
@@ -148,34 +124,5 @@ def _test_sparse(n_p, d_w, n_r):
         # note that this would fail for the non-sparse DMLCateEstimator
 
         np.testing.assert_allclose(a, dml.coef_.reshape(-1))
-        eff = reshape(t * np.choose(np.tile(p, 2), a), (-1, 1))
+        eff = reshape(t * np.choose(np.tile(p, 2), a), (-1,))
         np.testing.assert_allclose(eff, dml.effect(0, t, x))
-
-        dml = SparseLinearDMLCateEstimator(LinearRegression(fit_intercept=False),
-                                           LinearRegression(fit_intercept=False),
-                                           featurizer=Pipeline([("id", FunctionTransformer()),
-                                                                ("matrix", MatrixFeatures(1, 1))]))
-        dml.fit(y, t, x, w)
-        np.testing.assert_allclose(eff, dml.effect(0, t, x))
-
-    def test_complex_features(self):
-        # recover simple features by initializing complex features appropriately
-        for _ in range(10):
-            d_w = np.random.randint(0, 4)
-            d_x = np.random.randint(1, 3)
-            d_y = np.random.randint(1, 3)
-            d_t = np.random.randint(1, 3)
-            n = 20
-            with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t):
-                W, X, Y, T = [np.random.normal(size=(n, d)) for d in [d_w, d_x, d_y, d_t]]
-                # using full set of matrix features should be equivalent to using non-matrix featurizer
-                dml = DMLCateEstimator(model_y=LinearRegression(), model_t=LinearRegression(),
-                                       featurizer=MatrixFeatures(d_y, d_t))
-                dml.fit(Y, T, X, W)
-                coef1 = dml.coef_
-
-                dml = DMLCateEstimator(model_y=LinearRegression(), model_t=LinearRegression())
-                dml.fit(Y, T, X, W)
-                coef2 = dml.coef_
-
-                np.testing.assert_allclose(coef1, reshape(coef2, -1))