Skip to content

Commit

Permalink
Add support for discrete treatments to DML
Browse files Browse the repository at this point in the history
  • Loading branch information
kbattocchi committed Apr 11, 2019
1 parent 52c8f5d commit 614411f
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 10 deletions.
60 changes: 51 additions & 9 deletions econml/dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
import numpy as np
import copy
from .utilities import shape, reshape, ndim, hstack, cross_product, transpose
from sklearn.model_selection import KFold
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.linear_model import LinearRegression, LassoCV
from sklearn.preprocessing import PolynomialFeatures, FunctionTransformer
from sklearn.preprocessing import PolynomialFeatures, LabelEncoder, OneHotEncoder
from sklearn.base import clone, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.utils import check_random_state
Expand Down Expand Up @@ -53,11 +53,12 @@ class _RLearner(LinearCateEstimator):
"""

def __init__(self, model_y, model_t, model_final,
n_splits, random_state):
discrete_treatment, n_splits, random_state):
self._models_y = [clone(model_y, safe=False) for _ in range(n_splits)]
self._models_t = [clone(model_t, safe=False) for _ in range(n_splits)]
self._model_final = clone(model_final, safe=False)
self._n_splits = n_splits
self._discrete_treatment = discrete_treatment
self._random_state = check_random_state(random_state)

def fit(self, Y, T, X=None, W=None):
Expand All @@ -67,19 +68,33 @@ def fit(self, Y, T, X=None, W=None):
W = np.empty((shape(Y)[0], 0))
assert shape(Y)[0] == shape(T)[0] == shape(X)[0] == shape(W)[0]

folds = KFold(self._n_splits, shuffle=True, random_state=self._random_state).split(X)
Y_res = np.zeros_like(Y)
T_res = np.zeros_like(T)
if self._discrete_treatment:
folds = StratifiedKFold(self._n_splits, shuffle=True,
random_state=self._random_state).split(np.empty_like(X), T)
self._label_encoder = LabelEncoder()
self._one_hot_encoder = OneHotEncoder(categories='auto', sparse=False)
T = self._label_encoder.fit_transform(T)
T_out = self._one_hot_encoder.fit_transform(reshape(T, (-1, 1)))
T_out = T_out[:, 1:] # drop first column since all columns sum to one
else:
folds = KFold(self._n_splits, shuffle=True, random_state=self._random_state).split(X)
T_out = T

Y_res = np.zeros(shape(Y))
T_res = np.zeros(shape(T_out))
for idx, (train_idxs, test_idxs) in enumerate(folds):
Y_train, Y_test = Y[train_idxs], Y[test_idxs]
T_train, T_test = T[train_idxs], T[test_idxs]
T_train, T_test = T[train_idxs], T_out[test_idxs]
X_train, X_test = X[train_idxs], X[test_idxs]
W_train, W_test = W[train_idxs], W[test_idxs]
# TODO: If T is a vector rather than a 2-D array, then the model's fit must accept a vector...
# Do we want to reshape to an nx1, or just trust the user's choice of input?
# (Likewise for Y below)
self._models_t[idx].fit(X_train, W_train, T_train)
T_res[test_idxs] = T_test - self._models_t[idx].predict(X_test, W_test)
if self._discrete_treatment:
T_res[test_idxs] = T_test - self._models_t[idx].predict(X_test, W_test)[:, 1:]
else:
T_res[test_idxs] = T_test - self._models_t[idx].predict(X_test, W_test)
self._models_y[idx].fit(X_train, W_train, Y_train)
Y_res[test_idxs] = Y_test - self._models_y[idx].predict(X_test, W_test)

Expand Down Expand Up @@ -110,6 +125,15 @@ def const_marginal_effect(self, X=None):
X = np.ones((1, 1))
return self._model_final.predict(X)

# need to override effect in case of discrete treatments
# TODO: should this logic be moved up to the LinearCateEstimator class and
# removed from here and from the OrthoForest implementation?
def effect(self, T0, T1, X):
if self._discrete_treatment:
T0 = self._one_hot_encoder.transform(reshape(self._label_encoder.transform(T0), (-1, 1)))[:, 1:]
T1 = self._one_hot_encoder.transform(reshape(self._label_encoder.transform(T1), (-1, 1)))[:, 1:]
return super().effect(T0, T1, X)


class _DMLCateEstimatorBase(_RLearner):
"""
Expand All @@ -136,6 +160,9 @@ class _DMLCateEstimatorBase(_RLearner):
sparseLinear: bool
Whether to use sparse linear model assumptions
discrete_treatment: bool
Whether the treatment values should be treated as categorical, rather than continuous, quantities
n_splits: int
The number of splits to use when fitting the first-stage models.
Expand All @@ -150,6 +177,7 @@ def __init__(self,
model_y, model_t, model_final,
featurizer,
sparseLinear,
discrete_treatment,
n_splits,
random_state):

Expand All @@ -171,7 +199,10 @@ def fit(self, X, W, Target):
self._model.fit(self._combine(X, W), Target)

def predict(self, X, W):
return self._model.predict(self._combine(X, W))
if (not self._is_Y) and discrete_treatment:
return self._model.predict_proba(self._combine(X, W))
else:
return self._model.predict(self._combine(X, W))

class FinalWrapper:
def __init__(self):
Expand Down Expand Up @@ -209,6 +240,7 @@ def coef_(self):
super().__init__(model_y=FirstStageWrapper(model_y, is_Y=True),
model_t=FirstStageWrapper(model_t, is_Y=False),
model_final=FinalWrapper(),
discrete_treatment=discrete_treatment,
n_splits=n_splits,
random_state=random_state)

Expand Down Expand Up @@ -246,6 +278,9 @@ class DMLCateEstimator(_DMLCateEstimatorBase):
The transformer used to featurize the raw features when fitting the final model. Must implement
a `fit_transform` method.
discrete_treatment: bool, optional (default is False)
Whether the treatment values should be treated as categorical, rather than continuous, quantities
n_splits: int, optional (default is 2)
The number of splits to use when fitting the first-stage models.
Expand All @@ -259,13 +294,15 @@ class DMLCateEstimator(_DMLCateEstimatorBase):
def __init__(self,
model_y, model_t, model_final=LinearRegression(fit_intercept=False),
featurizer=PolynomialFeatures(degree=1, include_bias=True),
discrete_treatment=False,
n_splits=2,
random_state=None):
super().__init__(model_y=model_y,
model_t=model_t,
model_final=model_final,
featurizer=featurizer,
sparseLinear=False,
discrete_treatment=discrete_treatment,
n_splits=n_splits,
random_state=random_state)

Expand Down Expand Up @@ -296,6 +333,9 @@ class SparseLinearDMLCateEstimator(_DMLCateEstimatorBase):
The transformer used to featurize the raw features when fitting the final model. Must implement
a `fit_transform` method.
discrete_treatment: bool, optional (default is False)
Whether the treatment values should be treated as categorical, rather than continuous, quantities
n_splits: int, optional (default is 2)
The number of splits to use when fitting the first-stage models.
Expand All @@ -309,13 +349,15 @@ class SparseLinearDMLCateEstimator(_DMLCateEstimatorBase):
def __init__(self,
linear_model_y=LassoCV(), linear_model_t=LassoCV(), model_final=LinearRegression(fit_intercept=False),
featurizer=PolynomialFeatures(degree=1, include_bias=True),
discrete_treatment=False,
n_splits=2,
random_state=None):
super().__init__(model_y=linear_model_y,
model_t=linear_model_t,
model_final=model_final,
featurizer=featurizer,
sparseLinear=True,
discrete_treatment=discrete_treatment,
n_splits=n_splits,
random_state=random_state)

Expand Down
19 changes: 18 additions & 1 deletion econml/tests/test_dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import unittest
import pytest
from sklearn.base import TransformerMixin
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.linear_model import LinearRegression, Lasso, LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
from econml.dml import DMLCateEstimator, SparseLinearDMLCateEstimator, KernelDMLCateEstimator
Expand Down Expand Up @@ -52,6 +52,23 @@ def test_can_use_vectors(self):
dml.fit(np.array([1, 2, 3, 1, 2, 3]), np.array([1, 2, 3, 1, 2, 3]), np.ones((6, 1)))
self.assertAlmostEqual(dml.coef_.reshape(())[()], 1)

def test_discrete_treatments(self):
"""Test that we can use discrete treatments"""
dml = DMLCateEstimator(LinearRegression(), LogisticRegression(C=1000),
featurizer=FunctionTransformer(), discrete_treatment=True)
# create a simple artificial setup where effect of moving from treatment
# 1 -> 2 is 2,
# 1 -> 3 is 1, and
# 2 -> 3 is -1 (necessarily, by composing the previous two effects)
# Using an uneven number of examples from different classes,
# and having the treatments in non-lexicographic order,
# Should rule out some basic issues.
dml.fit(np.array([2, 3, 1, 3, 2, 1, 1, 1]), np.array([3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1)))
np.testing.assert_almost_equal(dml.effect(np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]),
np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]),
np.ones((9, 1))),
[0, 2, 1, -2, 0, -1, -1, 1, 0])

@staticmethod
def _generate_recoverable_errors(a_X, X, a_W=None, W=None, featurizer=FunctionTransformer()):
"""Return error vectors e_t and e_y such that OLS can recover the true coefficients from both stages."""
Expand Down

0 comments on commit 614411f

Please sign in to comment.