diff --git a/deprl/dep_controller.py b/deprl/dep_controller.py index 9161f31..1cc80f5 100644 --- a/deprl/dep_controller.py +++ b/deprl/dep_controller.py @@ -11,7 +11,7 @@ class DEP: """ DEP Implementation from Der et al.(2015). - Jax is used instead of numpy to speed up computation, GPU strongly + PyTorch is used instead of numpy to speed up computation, GPU strongly recommended. In the future, proper JAX features such as jit, vmap, etc should be used. diff --git a/examples/example_only_dep_myosuite.py b/examples/example_only_dep_myosuite.py new file mode 100644 index 0000000..ae69dce --- /dev/null +++ b/examples/example_only_dep_myosuite.py @@ -0,0 +1,25 @@ +import gym +import myosuite +import time +from deprl import env_wrappers +from deprl.dep_controller import DEP + + + +env = gym.make('myoLegWalk-v0') +env = env_wrappers.GymWrapper(env) + +# You can also use SconeWrapper for Wrapper +# env = env_wrappers.SconeWrapper(env) +dep = DEP() +dep.initialize(env.observation_space, env.action_space) + +env.reset() +for i in range(1000): + action = dep.step(env.muscle_lengths())[0,:] + print(action.shape) + next_state, reward, done, _ = env.step(action) + time.sleep(0.01) + env.mj_render() + + diff --git a/examples/example_only_dep_scone.py b/examples/example_only_dep_scone.py new file mode 100644 index 0000000..fa0a93a --- /dev/null +++ b/examples/example_only_dep_scone.py @@ -0,0 +1,49 @@ +import gym +import sconegym +from deprl import env_wrappers +from deprl.dep_controller import DEP + + +# create the sconegym env +env = gym.make('sconewalk_h2190-v1') + +# apply wrapper to environment +env = env_wrappers.SconeWrapper(env) + +# create DEP, parameters are loaded from default path +dep = DEP() + +# give DEP obs and action space to create right dimensions +dep.initialize(env.observation_space, env.action_space) + +env.seed(0) + +for ep in range(5): + if ep % 1 == 0: + env.store_next_episode() # Store results of every Nth episode + + ep_steps = 0 + ep_tot_reward = 0 + state = env.reset() + + while True: + # samples random action + action = dep.step(env.muscle_lengths())[0,:] + # applies action and advances environment by one step + state, reward, done, info = env.step(action) + + ep_steps += 1 + ep_tot_reward += reward + + # check if done + if done or (ep_steps >= 1000): + print( + f"Episode {ep} ending; steps={ep_steps}; reward={ep_tot_reward:0.3f}; \ + com={env.model.com_pos()}" + ) + env.write_now() + env.reset() + break + +env.close() +