Skip to content

Commit

Permalink
Merge pull request #12 from martius-lab/gymnasium_update
Browse files Browse the repository at this point in the history
Gymnasium update
  • Loading branch information
P-Schumacher authored Aug 14, 2024
2 parents a8099c3 + 8e95a42 commit 84c3579
Show file tree
Hide file tree
Showing 25 changed files with 491 additions and 488 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
python3 -m pip install --upgrade pip
pip3 install -e .
pip3 install -r requirements.txt
pip3 install myosuite==2.1.3
pip3 install myosuite==2.5.0
pip3 install pytest
- name: Run Test environment
Expand Down
2 changes: 2 additions & 0 deletions deprl/custom_distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,8 @@ def distribute(
env=environment, parallel=parallel, sequential=sequential
)

if "header" in tonic_conf:
exec(tonic_conf["header"])
dummy_environment = build_env_from_dict(build_dict)
max_episode_steps = dummy_environment._max_episode_steps
del dummy_environment
Expand Down
2 changes: 1 addition & 1 deletion deprl/dep_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
from collections import deque

import gym
import gymnasium as gym
import torch

torch.set_default_dtype(torch.float32)
Expand Down
5 changes: 4 additions & 1 deletion deprl/env_wrappers/gym_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,7 @@ def muscle_activity(self):

@property
def _max_episode_steps(self):
return self.unwrapped.max_episode_steps
if hasattr(self.unwrapped, "max_episode_steps"):
return self.unwrapped.max_episode_steps
else:
return self.unwrapped.horizon
9 changes: 8 additions & 1 deletion deprl/env_wrappers/scone_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,14 @@ def _inner_step(self, action):
done = self.unwrapped._get_done()
self.unwrapped.time += self.step_size
self.unwrapped.total_reward += reward
return obs, reward, done, {}
truncated = (
self.unwrapped.time / self.step_size
) < self._max_episode_steps
return obs, reward, done, truncated, {}

def reset(self, *args, **kwargs):
obs = super().reset()
return obs, obs

@property
def _max_episode_steps(self):
Expand Down
14 changes: 11 additions & 3 deletions deprl/env_wrappers/wrappers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from abc import ABC, abstractmethod

import gym
import gymnasium as gym
import numpy as np

import deprl # noqa
Expand Down Expand Up @@ -89,6 +89,8 @@ def __init__(self, *args, **kwargs):

def reset(self, **kwargs):
observation = super().reset(**kwargs)
if len(observation) == 2 and type(observation) is tuple:
observation = observation[0]
if not np.any(np.isnan(observation)):
self.last_observation = observation.copy()
else:
Expand All @@ -97,10 +99,16 @@ def reset(self, **kwargs):

def step(self, action):
try:
observation, reward, done, info = self._inner_step(action)
(
observation,
reward,
terminated,
truncated,
info,
) = self._inner_step(action)
if np.any(np.isnan(observation)):
raise self.error("NaN detected! Resetting.")

done = terminated or truncated
except self.error as e:
logger.log(f"Simulator exception thrown: {e}")
observation = self.last_observation
Expand Down
2 changes: 1 addition & 1 deletion deprl/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import os
import time

import wandb
import yaml

import wandb
from deprl.vendor.tonic import utils


Expand Down
28 changes: 22 additions & 6 deletions deprl/vendor/tonic/environments/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,36 +3,49 @@
import os
from types import SimpleNamespace

import gym.wrappers
import numpy as np
from gymnasium import wrappers

try:
from myosuite.utils import gym
except ModuleNotFoundError:
pass


from deprl.vendor.tonic import environments
from deprl.vendor.tonic.utils import logger


def gym_environment(*args, **kwargs):
"""Returns a wrapped Gym environment."""
if "header" in kwargs:
kwargs.pop("header")

def _builder(*args, **kwargs):
return gym.make(*args, **kwargs)

return build_environment(_builder, *args, **kwargs)
return build_environment(_builder, *args, **kwargs, header=None)


def bullet_environment(*args, **kwargs):
"""Returns a wrapped PyBullet environment."""
if "header" in kwargs:
kwargs.pop("header")

def _builder(*args, **kwargs):
import pybullet_envs # noqa

return gym.make(*args, **kwargs)

return build_environment(_builder, *args, **kwargs)
return build_environment(_builder, *args, **kwargs, header=None)


def control_suite_environment(*args, **kwargs):
"""Returns a wrapped Control Suite environment."""

if "header" in kwargs:
kwargs.pop("header")

def _builder(name, *args, **kwargs):
domain, task = name.split("-")
environment = ControlSuiteEnvironment(
Expand All @@ -42,9 +55,9 @@ def _builder(name, *args, **kwargs):
environment.spec = SimpleNamespace(
max_episode_steps=time_limit, id="ostrichrl-dmcontrol"
)
return gym.wrappers.TimeLimit(environment, time_limit)
return wrappers.TimeLimit(environment, time_limit)

return build_environment(_builder, *args, **kwargs)
return build_environment(_builder, *args, **kwargs, header=None)


def build_environment(
Expand All @@ -54,6 +67,7 @@ def build_environment(
time_feature=False,
max_episode_steps="default",
scaled_actions=True,
header=None,
*args,
**kwargs,
):
Expand All @@ -62,6 +76,8 @@ def build_environment(
time_feature=True, see https://arxiv.org/pdf/1712.00378.pdf for more
details.
"""
if header is not None:
exec(header)

# Build the environment.
environment = builder(name, *args, **kwargs)
Expand All @@ -81,7 +97,7 @@ def build_environment(

# Remove the TimeLimit wrapper if needed.
if not terminal_timeouts:
if type(environment) == gym.wrappers.TimeLimit:
if type(environment) == wrappers.TimeLimit:
environment = environment.env

# Add time as a feature if needed.
Expand Down
2 changes: 1 addition & 1 deletion deprl/vendor/tonic/environments/wrappers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Environment wrappers."""

import gym
import gymnasium as gym
import numpy as np


Expand Down
9 changes: 4 additions & 5 deletions examples/example_load_baseline_myosuite.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@

import time

import gym
import myosuite # noqa
from myosuite.utils import gym

import deprl
from deprl import env_wrappers

# create the sconegym env
env = gym.make("myoChallengeChaseTagP1-v0")
env = gym.make("myoLegWalk-v0", reset_type="random")
env = env_wrappers.GymWrapper(env)
policy = deprl.load_baseline(env)

env.seed(0)
Expand All @@ -36,5 +37,3 @@
)
env.reset()
break

env.close()
2 changes: 1 addition & 1 deletion examples/example_only_dep_myosuite.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import time

import gym
import myosuite # noqa
from myosuite.utils import gym

from deprl import env_wrappers
from deprl.dep_controller import DEP
Expand Down
6 changes: 3 additions & 3 deletions experiments/hyfydy/scone_walk_opensim_h0918.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ tonic:
after_training: ''
header: "import deprl, gym, sconegym"
agent: "deprl.custom_agents.dep_factory(3, deprl.custom_mpo_torch.TunedMPO())(replay=deprl.custom_replay_buffers.AdaptiveEnergyBuffer(return_steps=1,
batch_size=256, steps_between_batches=1000, batch_iterations=30, steps_before_batches=2e5,
batch_size=256, steps_between_batches=1000, batch_iterations=30, steps_before_batches=1000,
num_acts=18))"
before_training: ''
checkpoint: "last"
Expand All @@ -11,8 +11,8 @@ tonic:
name: "sconewalk_h0918_osimv1"
resume: true
seed: 0
parallel: 20
sequential: 10
parallel: 1
sequential: 1
test_environment: null
trainer: "deprl.custom_trainer.Trainer(steps=int(5e8), epoch_steps=int(2e5), save_steps=int(1e6))"

Expand Down
2 changes: 1 addition & 1 deletion experiments/myosuite_training_files/myoChaseTag.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ tonic:
reset_type='random')
environment_name: deprl_baseline_chasetag
full_save: 1
header: import deprl, gym, myosuite
header: import deprl, myosuite; from myosuite.utils import gym
name: myoChasetag
parallel: 20
path: ./output
Expand Down
2 changes: 1 addition & 1 deletion experiments/myosuite_training_files/myoLegWalk.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ tonic:
environment: deprl.environments.Gym('myoLegWalk-v0', scaled_actions=False, reset_type='random')
environment_name: deprl_baseline
full_save: 1
header: import deprl, gym, myosuite
header: import deprl, myosuite; from myosuite.utils import gym
name: myoLeg
parallel: 20
resume: 1
Expand Down
2 changes: 1 addition & 1 deletion experiments/myosuite_training_files/myoRelocate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ tonic:
environment: deprl.environments.Gym('myoChallengeRelocateP1-v0', scaled_actions=False)
environment_name: deprl_baseline_relocate
full_save: 1
header: import deprl, gym, myosuite
header: import deprl, myosuite; from myosuite.utils import gym
name: Relocate
parallel: 20
resume: 1
Expand Down
39 changes: 39 additions & 0 deletions experiments/myosuite_training_files/myoRunTrack.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
DEP:
bias_rate: 0.002
buffer_size: 200
intervention_length: 5
intervention_proba: 0.0004
kappa: 1169.7
normalization: independent
q_norm_selector: l2
regularization: 32
s4avg: 2
sensor_delay: 1
tau: 40
test_episode_every: 3
time_dist: 5
with_learning: true
env_args: {}
mpo_args:
hidden_size: 1024
lr_actor: 3.53e-05
lr_critic: 6.081e-05
lr_dual: 0.00213
tonic:
after_training: ''
agent: deprl.custom_agents.dep_factory(3, deprl.custom_mpo_torch.TunedMPO())(replay=deprl.replays.buffers.Buffer(return_steps=3,
batch_size=256, steps_between_batches=1000, batch_iterations=30, steps_before_batches=2e5))
before_training: ''
checkpoint: last
environment: deprl.environments.Gym('myoChallengeRunTrackP1-v0', scaled_actions=False)
environment_name: deprl_baseline_runtrack
full_save: 1
header: import deprl, myosuite; from myosuite.utils import gym
name: myoLeg
parallel: 20
resume: 1
seed: 0
sequential: 10
test_environment: null
trainer: deprl.custom_trainer.Trainer(steps=int(1e8), epoch_steps=int(2e5), save_steps=int(1e6))
working_dir: ./baselines_DEPRL
Loading

0 comments on commit 84c3579

Please sign in to comment.