facebookresearch · luisenp · Oct 28, 2022 · Oct 22, 2022 · Oct 22, 2022 · Oct 27, 2022
diff --git a/theseus/core/objective.py b/theseus/core/objective.py
@@ -509,9 +509,7 @@ def _get_batch_size(batch_sizes: Sequence[int]) -> int:
         self._batch_size = _get_batch_size(batch_sizes)
 
     def _vectorization_needs_update(self):
-        num_updates = dict(
-            (name, v._num_updates) for name, v in self._all_variables.items()
-        )
+        num_updates = {name: v._num_updates for name, v in self._all_variables.items()}
         needs = False
         if num_updates != self._num_updates_variables:
             self._num_updates_variables = num_updates
@@ -575,6 +573,11 @@ def retract_optim_vars(
         self._retract_method(
             delta, ordering, ignore_mask=ignore_mask, force_update=force_update
         )
+        # Updating immediately is useful, since it will keep grad history if
+        # needed. Otherwise, with lazy waitng we can be in a situation where
+        # vectorization is updated with torch.no_grad() (e.g., for error logging),
+        # and then it has to be run again later when grad is back on.
+        self.update_vectorization_if_needed()
 
     def _enable_vectorization(
         self,

diff --git a/theseus/core/vectorizer.py b/theseus/core/vectorizer.py
@@ -228,7 +228,11 @@ def _update_all_cost_fns_var_tensors(
                 # when updating the vectorized variable containers.
                 tensor = (
                     var.tensor
-                    if (var_batch_size > 1 or Vectorize._SHARED_TOKEN in name)
+                    if (
+                        var_batch_size > 1
+                        or objective_batch_size == 1
+                        or Vectorize._SHARED_TOKEN in name
+                    )
                     else Vectorize._expand(var.tensor, objective_batch_size)
                 )
                 names_to_tensors[name].append(tensor)