Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correct DML shapes for vectors #15

Merged
merged 2 commits into from
Apr 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions econml/cate_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,13 @@ def effect(self, T0, T1, X=None):
# TODO: what if input is sparse? - there's no equivalent to einsum,
# but tensordot can't be applied to this problem because we don't sum over m
dT = T1 - T0
eff = self.const_marginal_effect(X)
einsum_str = 'myt,mt->my'
if ndim(dT) == 1:
dT = reshape(dT, (-1, 1))
return np.einsum('myt,mt->my', self.const_marginal_effect(X), dT)
einsum_str = einsum_str.replace('t', '')
if ndim(eff) == ndim(dT): # y is a vector, rather than a 2D array
einsum_str = einsum_str.replace('y', '')
return np.einsum(einsum_str, eff, dT)

def marginal_effect(self, T, X=None):
"""
Expand Down
65 changes: 26 additions & 39 deletions econml/dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,9 @@ def fit(self, Y, T, X=None, W=None):
T : array_like, shape (n, d_t)
Treatment policy.

X : array-like, shape (n, d_x)
X : array-like, shape (n, d_x) or None (default=None)
Feature vector that captures heterogeneity.
If X is None, then the featurizer will be ignored and X will be treated as a column of ones.

W : array-like, shape (n, d_w) or None (default=None)
High-dimensional controls.
Expand All @@ -81,14 +82,17 @@ def fit(self, Y, T, X=None, W=None):

"""
if X is None:
X = np.empty((shape(Y)[0], 0))
X = np.ones((shape(Y)[0], 1))
phi_X = X
else:
phi_X = self._featurizer.fit_transform(X)
if W is None:
W = np.empty((shape(Y)[0], 0))
assert shape(Y)[0] == shape(T)[0] == shape(X)[0] == shape(W)[0]

# Handle case where Y or T is a vector instead of a 2-dimensional array
self._d_t = shape(T)[1] if ndim(T) == 2 else 1
self._d_y = shape(Y)[1] if ndim(Y) == 2 else 1
self._d_t = shape(T)[1:]
self._d_y = shape(Y)[1:]

y_res = np.zeros(shape(Y))
t_res = np.zeros(shape(T))
Expand All @@ -108,21 +112,9 @@ def fit(self, Y, T, X=None, W=None):
self._models_y[idx].fit(hstack([X_train, W_train]), Y_train)
y_res[test_idxs] = Y_test - self._models_y[idx].predict(hstack([X_test, W_test]))

phi_X = self._featurizer.fit_transform(X)
self._d_phi = shape(phi_X)[1]

if phi_X.ndim == 2:
self._simple_features = True
self._model_final.fit(cross_product(phi_X, t_res), y_res)
else:
assert phi_X.ndim == 4 # m × d_phi × d_y × dₜ
assert shape(phi_X)[2] == shape(Y)[1] if ndim(Y) == 2 else 1
assert shape(phi_X)[3] == shape(T)[1] if ndim(T) == 2 else 1
self._simple_features = False
if ndim(T) == 1:
t_res = reshape(t_res, (-1, 1)) # reshape so that einsum indices are correct even in vector t case
self._model_final.fit(reshape(np.einsum('mt,mpyt->myp', t_res, phi_X),
(-1, self._d_phi)), reshape(y_res, (-1,)))
self._model_final.fit(cross_product(phi_X, t_res), y_res)

def const_marginal_effect(self, X=None):
"""
Expand All @@ -134,7 +126,8 @@ def const_marginal_effect(self, X=None):
Parameters
----------
X: optional (m × dₓ) matrix
Features for each sample
Features for each sample.
If X is None, it will be treated as a column of ones with a single row

Returns
-------
Expand All @@ -145,22 +138,22 @@ def const_marginal_effect(self, X=None):
(e.g. if both are vectors, then the output of this method will also be a vector)
"""
if X is None:
X = np.empty((1, 0))
if self._simple_features:
# TODO: Doing this kronecker/reshaping/transposing stuff so that predict can be called
# rather than just using coef_ seems silly, but one benefit is that we can use linear models
# that don't expose a coef_ (e.g. a GridSearchCV over underlying linear models)
flat_eye = reshape(np.eye(self._d_t), (1, -1))
XT = reshape(np.kron(flat_eye, self._featurizer.fit_transform(X)),
(self._d_t * shape(X)[0], -1))
effects = reshape(self._model_final.predict(XT), (-1, self._d_t, self._d_y))
phi_X = np.ones((1, 1))
else:
phi_X = self._featurizer.fit_transform(X)
# create an identity matrix of size d_t (or just a 1-element array if T was a vector)
eye = np.eye(self._d_t[0]) if self._d_t else np.array([1])
# TODO: Doing this kronecker/reshaping/transposing stuff so that predict can be called
# rather than just using coef_ seems silly, but one benefit is that we can use linear models
# that don't expose a coef_ (e.g. a GridSearchCV over underlying linear models)
flat_eye = reshape(eye, (1, -1))
XT = reshape(np.kron(flat_eye, phi_X),
((self._d_t[0] if self._d_t else 1) * shape(phi_X)[0], -1))
effects = reshape(self._model_final.predict(XT), (-1,) + self._d_t + self._d_y)
if self._d_t and self._d_y:
return transpose(effects, (0, 2, 1)) # need to return as m by d_y by d_t matrix
else:
return reshape(self._model_final.predict(reshape(np.einsum('ts,mpyt->mysp',
np.eye(self._d_t),
self._featurizer.fit_transform(X)),
(-1, self._d_phi))),
(-1, self._d_y, self._d_t))
return effects

@property
def coef_(self):
Expand All @@ -172,10 +165,7 @@ def coef_(self):
(e.g. a `Pipeline` or `GridSearchCV` which wraps a linear model)
"""
# TODO: handle case where final model doesn't directly expose coef_?
if self._simple_features:
return reshape(self._model_final.coef_, (self._d_y, self._d_t, self._d_phi))
else:
return self._model_final.coef_
return reshape(self._model_final.coef_, self._d_y + self._d_t + (self._d_phi,))


class SparseLinearDMLCateEstimator(DMLCateEstimator):
Expand Down Expand Up @@ -218,9 +208,6 @@ def transform(XW):
X = XW[:, :self._d_x]
W = XW[:, self._d_x:]
F = featurizer.fit_transform(X)
if ndim(F) == 4:
# flatten the matrix features so that we can properly perform the cross product
F = reshape(F, (shape(XW)[0], -1))
return cross_product(XW, hstack([np.ones((shape(XW)[0], 1)), F, W]))

model_y = Pipeline([("transform", FunctionTransformer(transform)), ("model", linear_model_y)])
Expand Down
89 changes: 18 additions & 71 deletions econml/tests/test_dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,14 @@
import unittest
import pytest
from sklearn.base import TransformerMixin
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
from econml.dml import DMLCateEstimator, SparseLinearDMLCateEstimator
import numpy as np
from econml.utilities import shape, hstack, vstack, reshape, cross_product


class MatrixFeatures(TransformerMixin):
"""Featurizer that produces each one-entry matrix of size d_y by d_t for each feature in X (plus a constant)."""

def __init__(self, d_y, d_t):
self._d_y = d_y
self._d_t = d_t
self._fts = np.eye(d_y * d_t).reshape(d_y * d_t, d_y, d_t)

def fit(self, X):
return self

def transform(self, X):
# add column of ones to X
X = hstack([np.ones((shape(X)[0], 1)), X])
d_x = shape(X)[1]
d_y, d_t = self._d_y, self._d_t
# for each row, create the d_y*d_t*(d_x+1) features (which are matrices of size d_y by d_t)
return reshape(np.einsum('nx,fyt->nfxyt', X, self._fts), (shape(X)[0], d_y * d_t * d_x, d_y, d_t))


# all solutions to underdetermined (or exactly determined) Ax=b are given by A⁺b+(I-A⁺A)w for some arbitrary w
# note that if Ax=b is overdetermined, this will raise an assertion error
def rand_sol(A, b):
Expand All @@ -46,26 +26,22 @@ class TestDML(unittest.TestCase):

def test_cate_api(self):
"""Test that we correctly implement the CATE API."""
d_w = 2
d_x = 2
d_y = 2
d_t = 2
n = 20
with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t):
W, X, Y, T = [np.random.normal(size=(n, d)) for d in [d_w, d_x, d_y, d_t]]

dml = DMLCateEstimator(model_y=LinearRegression(), model_t=LinearRegression())
dml.fit(Y, T, X, W)
# just make sure we can call the marginal_effect and effect methods
dml.marginal_effect(None, X)
dml.effect(0, T, X)

dml = DMLCateEstimator(model_y=LinearRegression(), model_t=LinearRegression(),
featurizer=MatrixFeatures(d_y, d_t))
dml.fit(Y, T, X, W)
# just make sure we can call the marginal_effect and effect methods
dml.marginal_effect(None, X)
dml.effect(0, T, X)
for d_t in [2, -1]:
for d_y in [2, -1]:
for d_x in [2, None]:
for d_w in [2, None]:
n = 20
with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t):
W, X, Y, T = [np.random.normal(size=(n, d)) if (d and d >= 0)
else np.random.normal(size=(n,)) if (d and d < 0)
else None
for d in [d_w, d_x, d_y, d_t]]

dml = DMLCateEstimator(model_y=LinearRegression(), model_t=LinearRegression())
dml.fit(Y, T, X, W)
# just make sure we can call the marginal_effect and effect methods
dml.marginal_effect(None, X)
dml.effect(0, T, X)

def test_can_use_vectors(self):
"""Test that we can pass vectors for T and Y (not only 2-dimensional arrays)."""
Expand Down Expand Up @@ -148,34 +124,5 @@ def _test_sparse(n_p, d_w, n_r):
# note that this would fail for the non-sparse DMLCateEstimator

np.testing.assert_allclose(a, dml.coef_.reshape(-1))
eff = reshape(t * np.choose(np.tile(p, 2), a), (-1, 1))
eff = reshape(t * np.choose(np.tile(p, 2), a), (-1,))
np.testing.assert_allclose(eff, dml.effect(0, t, x))

dml = SparseLinearDMLCateEstimator(LinearRegression(fit_intercept=False),
LinearRegression(fit_intercept=False),
featurizer=Pipeline([("id", FunctionTransformer()),
("matrix", MatrixFeatures(1, 1))]))
dml.fit(y, t, x, w)
np.testing.assert_allclose(eff, dml.effect(0, t, x))

def test_complex_features(self):
# recover simple features by initializing complex features appropriately
for _ in range(10):
d_w = np.random.randint(0, 4)
d_x = np.random.randint(1, 3)
d_y = np.random.randint(1, 3)
d_t = np.random.randint(1, 3)
n = 20
with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t):
W, X, Y, T = [np.random.normal(size=(n, d)) for d in [d_w, d_x, d_y, d_t]]
# using full set of matrix features should be equivalent to using non-matrix featurizer
dml = DMLCateEstimator(model_y=LinearRegression(), model_t=LinearRegression(),
featurizer=MatrixFeatures(d_y, d_t))
dml.fit(Y, T, X, W)
coef1 = dml.coef_

dml = DMLCateEstimator(model_y=LinearRegression(), model_t=LinearRegression())
dml.fit(Y, T, X, W)
coef2 = dml.coef_

np.testing.assert_allclose(coef1, reshape(coef2, -1))