add double ml notebook and fix a minor typo in dml doc (#24)

heimengqi · web-flow · commit d2118d5e38c4 · 2019-04-09T15:40:12.000-04:00
* add double ml notebook and fix a minor typo in dml doc

* change the plot for cross price elasticities

* add a bootstrap CI for OJ data

* fix plot legend typo

* change dml shuffle True and update notebook based on all feedbacks

* Added random state to metalearner tests.
diff --git a/doc/spec/estimation/dml.rst b/doc/spec/estimation/dml.rst
@@ -116,7 +116,7 @@ One particularly attractive special case of the DML framework is the case when :
     f_i(X, W) =~& \ldot{\gamma_i}{(X; W)}\\
     \end{align}
 
-In this case we have a more structural form for the two regression tasks of estimating :math:`q` and :math:`p`. In particular, we can write:
+In this case we have a more structural form for the two regression tasks of estimating :math:`q` and :math:`f`. In particular, we can write:
 
 .. math::
     :nowrap:
diff --git a/econml/dml.py b/econml/dml.py
@@ -96,7 +96,7 @@ def fit(self, Y, T, X=None, W=None):
 
         y_res = np.zeros(shape(Y))
         t_res = np.zeros(shape(T))
-        for idx, (train_idxs, test_idxs) in enumerate(KFold(self._n_splits).split(X)):
+        for idx, (train_idxs, test_idxs) in enumerate(KFold(self._n_splits, shuffle=True).split(X)):
             Y_train, Y_test = Y[train_idxs], Y[test_idxs]
             T_train, T_test = T[train_idxs], T[test_idxs]
             X_train, X_test = X[train_idxs], X[test_idxs]
diff --git a/econml/tests/test_metalearners.py b/econml/tests/test_metalearners.py
@@ -15,6 +15,8 @@ class TestMetalearners(unittest.TestCase):
 
     @classmethod
     def setUpClass(cls):
+        # Set random seed
+        cls.random_state = np.random.RandomState(12345)
         # Generate data
         # DGP constants
         cls.d = 5
@@ -23,7 +25,7 @@ def setUpClass(cls):
         cls.beta = np.array([0.25, -0.38, 1.41, 0.50, -1.22])
         cls.heterogeneity_index = 1
         # Test data
-        cls.X_test = multivariate_normal(
+        cls.X_test = cls.random_state.multivariate_normal(
             np.zeros(cls.d),
             np.diag(np.ones(cls.d)),
             cls.n_test)
@@ -162,7 +164,7 @@ def _test_inputs(self, learner_instance):
 
     @classmethod
     def _untreated_outcome(cls, x):
-        return np.dot(x, cls.beta) + normal(0, 1)
+        return np.dot(x, cls.beta) + cls.random_state.normal(0, 1)
 
     @classmethod
     def _const_te(cls, x):
@@ -185,9 +187,9 @@ def _generate_data(cls, n, d, untreated_outcome, treatment_effect, propensity):
             propensity (func): probability of treatment conditional on covariates
         """
         # Generate covariates
-        X = multivariate_normal(np.zeros(d), np.diag(np.ones(d)), n)
+        X = cls.random_state.multivariate_normal(np.zeros(d), np.diag(np.ones(d)), n)
         # Generate treatment
-        T = np.apply_along_axis(lambda x: binomial(1, propensity(x), 1)[0], 1, X)
+        T = np.apply_along_axis(lambda x: cls.random_state.binomial(1, propensity(x), 1)[0], 1, X)
         # Calculate outcome
         Y0 = np.apply_along_axis(lambda x: untreated_outcome(x), 1, X)
         treat_effect = np.apply_along_axis(lambda x: treatment_effect(x), 1, X)
diff --git a/notebooks/Double Machine Learning Examples.ipynb b/notebooks/Double Machine Learning Examples.ipynb